程序在运行过程中如果出现coredump等异常,为了尽快确定异常代码位置,可以使用gdb调试工具,通过bt命令可以查看异常位置的backtrace/callstack,通过此信息有助于快速排查问题。
然而,通常情况下,程序在运行过程中不能一直开着gdb,或者程序已经挂掉了,此时也无法再使用gdb工具,这种情况下如何快速定位异常呢。
答案是有的,操作系统提供一种信号处理机制,可以在程序挂掉之前做一些用户自定义的操作,借助此功能,如果在程序挂掉前获取到堆栈信息,就可以找到异常代码位置。
首先写一个示例程序:
void InvokeDefaultSignalHandler(int signal_number) {struct sigaction sig_action;memset(&sig_action, 0, sizeof(sig_action));sigemptyset(&sig_action.sa_mask);sig_action.sa_handler = SIG_DFL;sigaction(signal_number, &sig_action, nullptr);kill(getpid(), signal_number);}void FailureSignalHandler(int signal_number, siginfo_t *signal_info, void *ucontext) {std::stringstream ss;void *buffer[32];int32_t nptrs = backtrace(buffer, 32);char** strings = backtrace_symbols(buffer, nptrs);for (int32_t i = 0; i < nptrs; ++i) {std::cout << std::hex << buffer[i] << " " << strings[i] << std::endl;}InvokeDefaultSignalHandler(signal_number);}void InstallFailureSignalHandler() {struct sigaction sig_action;memset(&sig_action, 0, sizeof(sig_action));sigemptyset(&sig_action.sa_mask);sig_action.sa_flags |= SA_SIGINFO;sig_action.sa_sigaction = &FailureSignalHandler;sigaction(SIGSEGV, &sig_action, nullptr);sigaction(SIGILL, &sig_action, nullptr);sigaction(SIGFPE, &sig_action, nullptr);sigaction(SIGABRT, &sig_action, nullptr);sigaction(SIGBUS, &sig_action, nullptr);sigaction(SIGTERM, &sig_action, nullptr);}int main(int argc, char** argv) {InstallFailureSignalHandler();Test()}
编译完成后,运行结果如下:
0x55e8d271b309 ./backtrace_test(_Z20FailureSignalHandleriP9siginfo_tPv+0x65) [0x55e8d271b309]0x7fe4c7455f10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7fe4c7455f10]0x55e8d271992e ./backtrace_test(_Z4Testv+0x34) [0x55e8d271992e]0x55e8d271b552 ./backtrace_test(main+0x19) [0x55e8d271b552]0x7fe4c7438c87 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7fe4c7438c87]0x55e8d271981a ./backtrace_test(_start+0x2a) [0x55e8d271981a]Segmentation fault (core dumped)
在上面堆栈中,可以看到堆栈对应的函数,但是看不到堆栈对应的行号,通过addr2line命令输出为??:0或??:?
addr2line -e ./backtrace_test 0x55e8d271b309 -Cf????:0addr2line -e /lib/x86_64-linux-gnu/libc.so.6 0x7fe4c7455f10 -Cf????:0
这是因为:addr2line只能通过地址偏移量来查找,而打印出的地址是绝对地址。由于共享库加载到内存的位置是不确定的,为了计算地址偏移量,需要获取到进程的内存映射文件(/proc/pid/maps):
在上述程序基础上增加以下代码:
struct LibraryBacktrace {void* offset_start;void* offset_end;char property[256];char not_care1[128];char not_care2[128];char not_care3[128];char library_path[256];};bool GetBacktraceLibrary(std::vector<LibraryBacktrace>* res) {FILE* fd_maps=NULL;fd_maps=fopen("/proc/self/maps","r");unsigned long exe_symbol_offset=0;char* unknow_position="??:0\n";if(fd_maps==NULL) {return -1;}char maps_line[1024];while(NULL!=fgets(maps_line,sizeof(maps_line),fd_maps)) {LibraryBacktrace d;sscanf(maps_line,"%p-%p\t%s\t%s\t%s\t%s\t%s",&d.offset_start,&d.offset_end,d.property,d.not_care1,d.not_care2,d.not_care3,d.library_path);std::cout << d.offset_start << ", " << d.offset_end << ", "<< d.property << ", " << d.not_care1 << ", " << d.not_care2<< ", " << d.not_care3 << ", " << d.library_path << std::endl;res->push_back(d);}fclose(fd_maps);}
通过上述接口获取到本进程的内存映射信息,然后遍历backtrace接口得到的调用堆栈地址(绝对地址),找到该地址在内存映射信息中的位置,从而可以的到该地址所在的动态库是哪个,以及该地址相对于该动态库起始地址的便宜量:
内存映射信息如下:
0x5644860d9000, 0x5644860eb000, r-xp, 00000000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test0x5644862eb000, 0x5644862ec000, r--p, 00012000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test0x5644862ec000, 0x5644862ed000, rw-p, 00013000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test0x564486ab6000, 0x564486ad7000, rw-p, 00000000, 00:00, 0, [heap]0x7f94c4bce000, 0x7f94c4d6b000, r-xp, 00000000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so0x7f94c4d6b000, 0x7f94c4f6a000, ---p, 0019d000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so0x7f94c4f6a000, 0x7f94c4f6b000, r--p, 0019c000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so0x7f94c4f6b000, 0x7f94c4f6c000, rw-p, 0019d000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so0x7f94c4f6c000, 0x7f94c5153000, r-xp, 00000000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so0x7f94c5153000, 0x7f94c5353000, ---p, 001e7000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so0x7f94c5353000, 0x7f94c5357000, r--p, 001e7000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so0x7f94c5357000, 0x7f94c5359000, rw-p, 001eb000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so0x7f94c5359000, 0x7f94c535d000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/libc-2.27.so0x7f94c535d000, 0x7f94c54d6000, r-xp, 00000000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.250x7f94c54d6000, 0x7f94c56d6000, ---p, 00179000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.250x7f94c56d6000, 0x7f94c56e0000, r--p, 00179000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.250x7f94c56e0000, 0x7f94c56e2000, rw-p, 00183000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.250x7f94c56e2000, 0x7f94c56e6000, rw-p, 00000000, 00:00, 0, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.250x7f94c56e6000, 0x7f94c56fd000, r-xp, 00000000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.10x7f94c56fd000, 0x7f94c58fc000, ---p, 00017000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.10x7f94c58fc000, 0x7f94c58fd000, r--p, 00016000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.10x7f94c58fd000, 0x7f94c58fe000, rw-p, 00017000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.10x7f94c58fe000, 0x7f94c5927000, r-xp, 00000000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so0x7f94c5af5000, 0x7f94c5af9000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so0x7f94c5b25000, 0x7f94c5b27000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so0x7f94c5b27000, 0x7f94c5b28000, r--p, 00029000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so0x7f94c5b28000, 0x7f94c5b29000, rw-p, 0002a000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so0x7f94c5b29000, 0x7f94c5b2a000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so0x7ffd64b68000, 0x7ffd64b89000, rw-p, 00000000, 00:00, 0, [stack]0x7ffd64bb3000, 0x7ffd64bb6000, r--p, 00000000, 00:00, 0, [vvar]0x7ffd64bb6000, 0x7ffd64bb8000, r-xp, 00000000, 00:00, 0, [vdso]0xffffffffff600000, 0xffffffffff601000, --xp, 00000000, 00:00, 0, [vsyscall]
堆栈信息如下:
0x5644860e48f9 ./backtrace_test(_Z20FailureSignalHandleriP9siginfo_tPv+0x65) [0x5644860e48f9]0x7f94c4faaf10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7f94c4faaf10]0x5644860e2f1e ./backtrace_test(_Z4Testv+0x34) [0x5644860e2f1e]0x5644860e4c68 ./backtrace_test(main+0x19) [0x5644860e4c68]0x7f94c4f8dc87 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7f94c4f8dc87]0x5644860e2e0a ./backtrace_test(_start+0x2a) [0x5644860e2e0a]
比如0x7f94c4faaf10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7f94c4faaf10]:
在上述内存映射信息中找0x5644860e2f1e所在的位置为:
0x5644860d9000, 0x5644860eb000, r-xp, 00000000, 08:11, 108659273, /mnt/baidu/test/backtrace/build/backtrace_test
对应的动态库是:/mnt/test/backtrace/build/backtrace_test
该动态库的起始地址是:0x5644860d9000
得到偏移地址:0x5644860e2f1e - 0x5644860d9000 = 0x9F1E
再次使用addr2line命令addr2line -e /mnt/test/backtrace/build/backtrace_test 0x9F1E:
结果为:/mnt/test/backtrace/backtrace_test_main.cc:15
std::string DumpSymbol(std::vector<SymbolData>* vec) {std::vector<LibraryBacktrace> library_backtrace;GetBacktraceLibrary(&library_backtrace);std::map<std::string, void*> m;for (const auto& it : library_backtrace) {std::string library = std::string(it.library_path);auto d = m.find(library);if (d == m.end()) {m.insert(std::make_pair(library, it.offset_start));} else {if (d->second >= it.offset_start) {d->second = it.offset_start;}}}for (auto& it : *vec) {if (!it.func1.empty() || it.addr1.empty() || it.file.empty()) {it.func2 = "(" + it.func1 + "+" + it.addr1 + ")";it.line = "??:?";// continue;}void* bt = 0;sscanf(it.addr2.c_str(), "%p", &bt);std::string library = "";for (const auto& it : library_backtrace) {if (it.offset_start <= bt && it.offset_end >= bt) {library = it.library_path;break;}}uint64_t offset = 0;auto it3 = m.find(library);if (it3 == m.end()) {std::cout << "library not found error" << std::endl;} else {offset = (char*)bt - (char*)it3->second;}char addrstr[128] = {0};snprintf(addrstr, sizeof(addrstr), "0x%lx", offset);std::string cmd = "addr2line " + std::string(addrstr) + " -e " + library + " -f -C";std::cout << "++++ cmd: " << std::hex << it.addr2 << ": " << cmd << std::endl;FILE* p = popen(cmd.c_str(), "r");if (!p) {continue;}char buff[1024] = { 0 };int32_t res_line_count = 0;while (fgets(buff, sizeof(buff), p) != nullptr) {std::string str(buff);if (!str.empty() && str.back() == '\n') {str = str.substr(0, str.length() - 1);}if (res_line_count == 0) {it.func2 = str;} else if (res_line_count == 1) {it.line = str;}++res_line_count;}pclose(p);}int32_t index = 0;std::stringstream ss;for (const auto& it : *vec) {ss << " #" << index << " " << it.addr2<< " " << it.func2 << " " << it.line << std::endl;++index;}return ss.str();}