程序在运行过程中如果出现coredump等异常,为了尽快确定异常代码位置,可以使用gdb调试工具,通过bt命令可以查看异常位置的backtrace/callstack,通过此信息有助于快速排查问题。
然而,通常情况下,程序在运行过程中不能一直开着gdb,或者程序已经挂掉了,此时也无法再使用gdb工具,这种情况下如何快速定位异常呢。
答案是有的,操作系统提供一种信号处理机制,可以在程序挂掉之前做一些用户自定义的操作,借助此功能,如果在程序挂掉前获取到堆栈信息,就可以找到异常代码位置。
首先写一个示例程序:
void InvokeDefaultSignalHandler(int signal_number) {
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_handler = SIG_DFL;
sigaction(signal_number, &sig_action, nullptr);
kill(getpid(), signal_number);
}
void FailureSignalHandler(int signal_number, siginfo_t *signal_info, void *ucontext) {
std::stringstream ss;
void *buffer[32];
int32_t nptrs = backtrace(buffer, 32);
char** strings = backtrace_symbols(buffer, nptrs);
for (int32_t i = 0; i < nptrs; ++i) {
std::cout << std::hex << buffer[i] << " " << strings[i] << std::endl;
}
InvokeDefaultSignalHandler(signal_number);
}
void InstallFailureSignalHandler() {
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_flags |= SA_SIGINFO;
sig_action.sa_sigaction = &FailureSignalHandler;
sigaction(SIGSEGV, &sig_action, nullptr);
sigaction(SIGILL, &sig_action, nullptr);
sigaction(SIGFPE, &sig_action, nullptr);
sigaction(SIGABRT, &sig_action, nullptr);
sigaction(SIGBUS, &sig_action, nullptr);
sigaction(SIGTERM, &sig_action, nullptr);
}
int main(int argc, char** argv) {
InstallFailureSignalHandler();
Test()
}
编译完成后,运行结果如下:
0x55e8d271b309 ./backtrace_test(_Z20FailureSignalHandleriP9siginfo_tPv+0x65) [0x55e8d271b309]
0x7fe4c7455f10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7fe4c7455f10]
0x55e8d271992e ./backtrace_test(_Z4Testv+0x34) [0x55e8d271992e]
0x55e8d271b552 ./backtrace_test(main+0x19) [0x55e8d271b552]
0x7fe4c7438c87 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7fe4c7438c87]
0x55e8d271981a ./backtrace_test(_start+0x2a) [0x55e8d271981a]
Segmentation fault (core dumped)
在上面堆栈中,可以看到堆栈对应的函数,但是看不到堆栈对应的行号,通过addr2line命令输出为??:0或??:?
addr2line -e ./backtrace_test 0x55e8d271b309 -Cf
??
??:0
addr2line -e /lib/x86_64-linux-gnu/libc.so.6 0x7fe4c7455f10 -Cf
??
??:0
这是因为:addr2line只能通过地址偏移量来查找,而打印出的地址是绝对地址。由于共享库加载到内存的位置是不确定的,为了计算地址偏移量,需要获取到进程的内存映射文件(/proc/pid/maps):
在上述程序基础上增加以下代码:
struct LibraryBacktrace {
void* offset_start;
void* offset_end;
char property[256];
char not_care1[128];
char not_care2[128];
char not_care3[128];
char library_path[256];
};
bool GetBacktraceLibrary(std::vector<LibraryBacktrace>* res) {
FILE* fd_maps=NULL;
fd_maps=fopen("/proc/self/maps","r");
unsigned long exe_symbol_offset=0;
char* unknow_position="??:0\n";
if(fd_maps==NULL) {
return -1;
}
char maps_line[1024];
while(NULL!=fgets(maps_line,sizeof(maps_line),fd_maps)) {
LibraryBacktrace d;
sscanf(maps_line,"%p-%p\t%s\t%s\t%s\t%s\t%s"
,&d.offset_start
,&d.offset_end
,d.property
,d.not_care1
,d.not_care2
,d.not_care3
,d.library_path);
std::cout << d.offset_start << ", " << d.offset_end << ", "
<< d.property << ", " << d.not_care1 << ", " << d.not_care2
<< ", " << d.not_care3 << ", " << d.library_path << std::endl;
res->push_back(d);
}
fclose(fd_maps);
}
通过上述接口获取到本进程的内存映射信息,然后遍历backtrace接口得到的调用堆栈地址(绝对地址),找到该地址在内存映射信息中的位置,从而可以的到该地址所在的动态库是哪个,以及该地址相对于该动态库起始地址的便宜量:
内存映射信息如下:
0x5644860d9000, 0x5644860eb000, r-xp, 00000000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test
0x5644862eb000, 0x5644862ec000, r--p, 00012000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test
0x5644862ec000, 0x5644862ed000, rw-p, 00013000, 08:11, 108659273, /mnt/test/backtrace/build/backtrace_test
0x564486ab6000, 0x564486ad7000, rw-p, 00000000, 00:00, 0, [heap]
0x7f94c4bce000, 0x7f94c4d6b000, r-xp, 00000000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so
0x7f94c4d6b000, 0x7f94c4f6a000, ---p, 0019d000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so
0x7f94c4f6a000, 0x7f94c4f6b000, r--p, 0019c000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so
0x7f94c4f6b000, 0x7f94c4f6c000, rw-p, 0019d000, 08:02, 14024800, /lib/x86_64-linux-gnu/libm-2.27.so
0x7f94c4f6c000, 0x7f94c5153000, r-xp, 00000000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so
0x7f94c5153000, 0x7f94c5353000, ---p, 001e7000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so
0x7f94c5353000, 0x7f94c5357000, r--p, 001e7000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so
0x7f94c5357000, 0x7f94c5359000, rw-p, 001eb000, 08:02, 14024791, /lib/x86_64-linux-gnu/libc-2.27.so
0x7f94c5359000, 0x7f94c535d000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/libc-2.27.so
0x7f94c535d000, 0x7f94c54d6000, r-xp, 00000000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25
0x7f94c54d6000, 0x7f94c56d6000, ---p, 00179000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25
0x7f94c56d6000, 0x7f94c56e0000, r--p, 00179000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25
0x7f94c56e0000, 0x7f94c56e2000, rw-p, 00183000, 08:02, 5776090, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25
0x7f94c56e2000, 0x7f94c56e6000, rw-p, 00000000, 00:00, 0, /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25
0x7f94c56e6000, 0x7f94c56fd000, r-xp, 00000000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.1
0x7f94c56fd000, 0x7f94c58fc000, ---p, 00017000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.1
0x7f94c58fc000, 0x7f94c58fd000, r--p, 00016000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.1
0x7f94c58fd000, 0x7f94c58fe000, rw-p, 00017000, 08:02, 14029994, /lib/x86_64-linux-gnu/libgcc_s.so.1
0x7f94c58fe000, 0x7f94c5927000, r-xp, 00000000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so
0x7f94c5af5000, 0x7f94c5af9000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so
0x7f94c5b25000, 0x7f94c5b27000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so
0x7f94c5b27000, 0x7f94c5b28000, r--p, 00029000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so
0x7f94c5b28000, 0x7f94c5b29000, rw-p, 0002a000, 08:02, 14024720, /lib/x86_64-linux-gnu/ld-2.27.so
0x7f94c5b29000, 0x7f94c5b2a000, rw-p, 00000000, 00:00, 0, /lib/x86_64-linux-gnu/ld-2.27.so
0x7ffd64b68000, 0x7ffd64b89000, rw-p, 00000000, 00:00, 0, [stack]
0x7ffd64bb3000, 0x7ffd64bb6000, r--p, 00000000, 00:00, 0, [vvar]
0x7ffd64bb6000, 0x7ffd64bb8000, r-xp, 00000000, 00:00, 0, [vdso]
0xffffffffff600000, 0xffffffffff601000, --xp, 00000000, 00:00, 0, [vsyscall]
堆栈信息如下:
0x5644860e48f9 ./backtrace_test(_Z20FailureSignalHandleriP9siginfo_tPv+0x65) [0x5644860e48f9]
0x7f94c4faaf10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7f94c4faaf10]
0x5644860e2f1e ./backtrace_test(_Z4Testv+0x34) [0x5644860e2f1e]
0x5644860e4c68 ./backtrace_test(main+0x19) [0x5644860e4c68]
0x7f94c4f8dc87 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7f94c4f8dc87]
0x5644860e2e0a ./backtrace_test(_start+0x2a) [0x5644860e2e0a]
比如0x7f94c4faaf10 /lib/x86_64-linux-gnu/libc.so.6(+0x3ef10) [0x7f94c4faaf10]
:
在上述内存映射信息中找0x5644860e2f1e
所在的位置为:
0x5644860d9000, 0x5644860eb000, r-xp, 00000000, 08:11, 108659273, /mnt/baidu/test/backtrace/build/backtrace_test
对应的动态库是:/mnt/test/backtrace/build/backtrace_test
该动态库的起始地址是:0x5644860d9000
得到偏移地址:0x5644860e2f1e - 0x5644860d9000 = 0x9F1E
再次使用addr2line
命令addr2line -e /mnt/test/backtrace/build/backtrace_test 0x9F1E
:
结果为:/mnt/test/backtrace/backtrace_test_main.cc:15
std::string DumpSymbol(std::vector<SymbolData>* vec) {
std::vector<LibraryBacktrace> library_backtrace;
GetBacktraceLibrary(&library_backtrace);
std::map<std::string, void*> m;
for (const auto& it : library_backtrace) {
std::string library = std::string(it.library_path);
auto d = m.find(library);
if (d == m.end()) {
m.insert(std::make_pair(library, it.offset_start));
} else {
if (d->second >= it.offset_start) {
d->second = it.offset_start;
}
}
}
for (auto& it : *vec) {
if (!it.func1.empty() || it.addr1.empty() || it.file.empty()) {
it.func2 = "(" + it.func1 + "+" + it.addr1 + ")";
it.line = "??:?";
// continue;
}
void* bt = 0;
sscanf(it.addr2.c_str(), "%p", &bt);
std::string library = "";
for (const auto& it : library_backtrace) {
if (it.offset_start <= bt && it.offset_end >= bt) {
library = it.library_path;
break;
}
}
uint64_t offset = 0;
auto it3 = m.find(library);
if (it3 == m.end()) {
std::cout << "library not found error" << std::endl;
} else {
offset = (char*)bt - (char*)it3->second;
}
char addrstr[128] = {0};
snprintf(addrstr, sizeof(addrstr), "0x%lx", offset);
std::string cmd = "addr2line " + std::string(addrstr) + " -e " + library + " -f -C";
std::cout << "++++ cmd: " << std::hex << it.addr2 << ": " << cmd << std::endl;
FILE* p = popen(cmd.c_str(), "r");
if (!p) {
continue;
}
char buff[1024] = { 0 };
int32_t res_line_count = 0;
while (fgets(buff, sizeof(buff), p) != nullptr) {
std::string str(buff);
if (!str.empty() && str.back() == '\n') {
str = str.substr(0, str.length() - 1);
}
if (res_line_count == 0) {
it.func2 = str;
} else if (res_line_count == 1) {
it.line = str;
}
++res_line_count;
}
pclose(p);
}
int32_t index = 0;
std::stringstream ss;
for (const auto& it : *vec) {
ss << " #" << index << " " << it.addr2
<< " " << it.func2 << " " << it.line << std::endl;
++index;
}
return ss.str();
}