The attached code works correctly if I compile it with no -O parameter. If however, I compile it with -O2, it fails to print out the intermediate functions in the traceback. Originally, I thought that everything was optimized out, so I put a call to printf into each of the routines to rule that out. It still had the same output.
Expected results: gcc -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo
./test
DEPTH=11
./test: f0 (0x40d952)
./test: f1 (0x40da0e)
./test: f2 (0x40da1e)
./test: f3 (0x40da2e)
./test: f4 (0x40da3e)
./test: f5 (0x40da4e)
./test: f6 (0x40da5e)
./test: f7 (0x40da6e)
./test: main (0x40da89)
./test: _start (0x40080e)
Unexpected results: gcc -O2 -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo
./test
DEPTH=2
./test: f0 (0x40794b)
#include <stdio.h>
#include <dlfcn.h>
#define CALLSTACK_MAXLEN 64
//
// We use this macro instead of a for loop in backtrace() because the
// documentation says that you have to use a constant, not a variable.
//
#define BT(X) { \
case X: \
if (!__builtin_frame_address(X)) { \
return X; \
} \
\
trace[X].address = __builtin_return_address(X); \
break; \
}
struct call {
const void *address;
const char *function;
const char *object;
};
struct call trace[CALLSTACK_MAXLEN];
int
backtrace(int depth) {
int i;
Dl_info dlinfo;
for (i = 0; i < depth; i++) {
switch (i) {
BT( 0);
BT( 1);
BT( 2);
BT( 3);
BT( 4);
BT( 5);
BT( 6);
BT( 7);
BT( 8);
BT( 9);
BT( 10);
BT( 11);
BT( 12);
BT( 13);
BT( 14);
BT( 15);
BT( 16);
BT( 17);
BT( 18);
BT( 19);
default: return i;
}
if (dladdr(trace[i].address, &dlinfo) != 0) {
trace[i].function = dlinfo.dli_sname;
trace[i].object = dlinfo.dli_fname;
}
}
return i;
}
void
f0() {
int i;
int depth;
depth = backtrace(CALLSTACK_MAXLEN);
printf("DEPTH=%d\n", depth);
for (i = 0 ; trace[i].object != NULL; i++) {
printf("%s: %s (%p)\n", trace[i].object, trace[i].function, trace[i].address);
}
}
void f1() { f0(); }
void f2() { f1(); }
void f3() { f2(); }
void f4() { f3(); }
void f5() { f4(); }
void f6() { f5(); }
void f7() { f6(); }
int main(int argc, char **argv) {
f7();
return 0;
}
Reason is tail-recursive optimization. Even if inlining is switched off, tail recursion changes call to jump, like
f6:
.LFB29:
.cfi_startproc
xorl %eax, %eax
jmp f5
So you must:
Exclude inlining
void __attribute__ ((noinline)) f1() { f0(); }
void __attribute__ ((noinline)) f2() { f1(); }
void __attribute__ ((noinline)) f3() { f2(); }
void __attribute__ ((noinline)) f4() { f3(); }
void __attribute__ ((noinline)) f5() { f4(); }
void __attribute__ ((noinline)) f6() { f5(); }
void __attribute__ ((noinline)) f7() { f6(); }
Compile with -fno-optimize-sibling-calls and preserve frame pointer
gcc -O2 -rdynamic -g -o bfa bfa.c -ldl -fno-optimize-sibling-calls -fno-omit-frame-pointer
Output is:
$ ./bfa
DEPTH=10
./bfa: f0 (0x400f23)
./bfa: f1 (0x400f8b)
./bfa: f2 (0x400f9b)
./bfa: f3 (0x400fab)
./bfa: f4 (0x400fbb)
./bfa: f5 (0x400fcb)
./bfa: f6 (0x400fdb)
./bfa: f7 (0x400feb)
./bfa: main (0x400ffb)
/lib/libc.so.6: __libc_start_main (0x7fdfbae51c4d)
As desired.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With