Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why can't C (nor C++) optimize this recursive experiment?

I was thinking about functional programming techniques, recursions, and constness, and linked lists, so I made two experiments to test my compiler. In theory the compiler should be capable of optimizing away tail recursion (I now realize the functions provided here are NOT tail recursive and I apologize for trying to sneak them by as if they are, the actual tail recursive variants can be found in an answer I posted below) and produce a result at compile time without even building the structures in the first place at runtime.

It works as expected on the array variant below:

/**
 * @file test1.c
 */

static inline int my_array_sum(const int n, const int * const xs) {
    if (n == 0) {
        return n;
    } else {
        return n + my_array_sum(n - 1, xs + 1);
    }
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

producing the following object code (gcc test1.c -o test1.obj -c -O3, objdump -D test1.obj):

In MinGW/MSys 64bit:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

In Linux Mint 64 bit:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq   

Whereas the linked list (which is a dirty word, but it is not malloced, and all the info about the list is available to the compiler) version below (sentence finished after code):

/**
 * @file test2.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum(const Cons c) {
    if (c.next == 0) {
        return c.x;
    } else {
        return c.x + cons_sum(*(c.next));
    }
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value (expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

produces the following object code (gcc test2.c -o test2.obj -c -O3, objdump -D test2.obj):

In MinGW/MSys 64bit:

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   b9 02 00 00 00          mov    $0x2,%ecx
  1b:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  20:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  25:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  2a:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  2f:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  34:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  39:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  3e:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  43:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  4a:   00
  4b:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  52:   00 00
  54:   c7 44 24 30 07 00 00    movl   $0x7,0x30(%rsp)
  5b:   00
  5c:   c7 44 24 40 06 00 00    movl   $0x6,0x40(%rsp)
  63:   00
  64:   c7 44 24 50 05 00 00    movl   $0x5,0x50(%rsp)
  6b:   00
  6c:   c7 44 24 60 04 00 00    movl   $0x4,0x60(%rsp)
  73:   00
  74:   c7 44 24 70 03 00 00    movl   $0x3,0x70(%rsp)
  7b:   00
  7c:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  81:   e8 00 00 00 00          callq  86 <main+0x86>
  86:   83 c0 01                add    $0x1,%eax
  89:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  90:   c3                      retq

In Linux Mint 64 bit:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  5a:   00 
  5b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  62:   00 
  63:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  6a:   00 
  6b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  72:   00 
  73:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  7a:   00 
  7b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  80:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  85:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  8a:   b8 02 00 00 00          mov    $0x2,%eax
  8f:   90                      nop
  90:   89 c6                   mov    %eax,%esi
  92:   8b 02                   mov    (%rdx),%eax
  94:   48 8b 52 08             mov    0x8(%rdx),%rdx
  98:   01 f1                   add    %esi,%ecx
  9a:   48 85 d2                test   %rdx,%rdx
  9d:   75 f1                   jne    90 <main+0x90>
  9f:   01 c8                   add    %ecx,%eax
  a1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  a6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  ad:   00 00 
  af:   75 05                   jne    b6 <main+0xb6>
  b1:   48 83 c4 78             add    $0x78,%rsp
  b5:   c3                      retq   
  b6:   e8 00 00 00 00          callq  bb <main+0xbb>

I tried compiling the above in both C and C++, and both gave more or less the same disassembly.

Does the standard guarantee even strictly constant linked list variant (on the stack, as I can see reasons for a global linked list not to get optimized) does not get optimized to the extent it would with simple arrays, or is it just the current state of the compiler and it may get optimized in the future? Or am I forgetting some kind of a flag to enable this kind of optimization?

like image 913
Dmitry Avatar asked Oct 27 '25 05:10

Dmitry


1 Answers

Ignore this answer if you do not care about the tail recursion

As noted, the question initially talked about tail recursion, and I have provided regular recursion rather than tail recursion.

For completeness, the actual tail recursive functions are below.

The array version:

/**
 * @file test1_tail.c
 */

int my_array_sum_tail(const int n, const int * const xs, const int accumulator) {
    if (n == 1) {
        return accumulator + xs[n - 1];
    } else {
        return my_array_sum_tail(n - 1, xs, accumulator + xs[n - 1]);
    }
}

int my_array_sum(const int n, const int * const xs) {
    return my_array_sum_tail(n, xs, 0);
}

int main(int argc, char **argv)
{    
    const int xs[] = {1, 2, 3, 4, 5, 6, 7, 8};
    const int n = 8;    
    const int sum = my_array_sum(n, xs); 

    return sum;
}

producing the following object code (gcc test1_tail.c -o test1_tail.obj -c -O3, objdump -D test1_tail.obj):

In MinGW/MSys 64bit:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp
  12:   c3                      retq

In Linux Mint 64 bit:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq 

The list version:

/**
 * @file test2_tail.c
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

static inline int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

static inline int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    const Cons s0 = {8, 0};
    const Cons s1 = {7, &s0};
    const Cons s2 = {6, &s1};
    const Cons s3 = {5, &s2};
    const Cons s4 = {4, &s3};
    const Cons s5 = {3, &s4};
    const Cons s6 = {2, &s5};
    const Cons h = {1, &s6};    
    
    // | To produce a return value(expect 36).
    const int sum = cons_sum(h); 

    return sum;
}

producing the following object code (gcc test2_tail.c -o test2_tail.obj -c -O3, objdump -D test2_tail.obj):

In MinGW/MSys 64bit:

0000000000000000 <main>:
   0:   48 81 ec 88 00 00 00    sub    $0x88,%rsp
   7:   e8 00 00 00 00          callq  c <main+0xc>
   c:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  11:   48 8d 54 24 70          lea    0x70(%rsp),%rdx
  16:   41 b8 01 00 00 00       mov    $0x1,%r8d
  1c:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  21:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  26:   b9 02 00 00 00          mov    $0x2,%ecx
  2b:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  30:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  35:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  3a:   48 8d 44 24 50          lea    0x50(%rsp),%rax
  3f:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  44:   48 8d 44 24 60          lea    0x60(%rsp),%rax
  49:   c7 44 24 20 08 00 00    movl   $0x8,0x20(%rsp)
  50:   00
  51:   48 c7 44 24 28 00 00    movq   $0x0,0x28(%rsp)
  58:   00 00
  5a:   48 c7 44 24 30 07 00    movq   $0x7,0x30(%rsp)
  61:   00 00
  63:   48 c7 44 24 40 06 00    movq   $0x6,0x40(%rsp)
  6a:   00 00
  6c:   48 c7 44 24 50 05 00    movq   $0x5,0x50(%rsp)
  73:   00 00
  75:   48 c7 44 24 60 04 00    movq   $0x4,0x60(%rsp)
  7c:   00 00
  7e:   48 c7 44 24 70 03 00    movq   $0x3,0x70(%rsp)
  85:   00 00
  87:   48 89 44 24 78          mov    %rax,0x78(%rsp)
  8c:   e8 00 00 00 00          callq  91 <main+0x91>
  91:   48 81 c4 88 00 00 00    add    $0x88,%rsp
  98:   c3                      retq

In Linux Mint 64 bit:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   48 83 ec 78             sub    $0x78,%rsp
   8:   b9 01 00 00 00          mov    $0x1,%ecx
   d:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
  14:   00 00 
  16:   48 89 44 24 68          mov    %rax,0x68(%rsp)
  1b:   31 c0                   xor    %eax,%eax
  1d:   48 89 e0                mov    %rsp,%rax
  20:   48 8d 54 24 50          lea    0x50(%rsp),%rdx
  25:   c7 04 24 08 00 00 00    movl   $0x8,(%rsp)
  2c:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  31:   48 8d 44 24 10          lea    0x10(%rsp),%rax
  36:   48 89 44 24 28          mov    %rax,0x28(%rsp)
  3b:   48 8d 44 24 20          lea    0x20(%rsp),%rax
  40:   48 89 44 24 38          mov    %rax,0x38(%rsp)
  45:   48 8d 44 24 30          lea    0x30(%rsp),%rax
  4a:   48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
  51:   00 00 
  53:   c7 44 24 14 00 00 00    movl   $0x0,0x14(%rsp)
  5a:   00 
  5b:   c7 44 24 10 07 00 00    movl   $0x7,0x10(%rsp)
  62:   00 
  63:   c7 44 24 24 00 00 00    movl   $0x0,0x24(%rsp)
  6a:   00 
  6b:   c7 44 24 20 06 00 00    movl   $0x6,0x20(%rsp)
  72:   00 
  73:   c7 44 24 34 00 00 00    movl   $0x0,0x34(%rsp)
  7a:   00 
  7b:   c7 44 24 30 05 00 00    movl   $0x5,0x30(%rsp)
  82:   00 
  83:   c7 44 24 44 00 00 00    movl   $0x0,0x44(%rsp)
  8a:   00 
  8b:   c7 44 24 40 04 00 00    movl   $0x4,0x40(%rsp)
  92:   00 
  93:   c7 44 24 54 00 00 00    movl   $0x0,0x54(%rsp)
  9a:   00 
  9b:   c7 44 24 50 03 00 00    movl   $0x3,0x50(%rsp)
  a2:   00 
  a3:   48 89 44 24 48          mov    %rax,0x48(%rsp)
  a8:   48 8d 44 24 40          lea    0x40(%rsp),%rax
  ad:   48 89 44 24 58          mov    %rax,0x58(%rsp)
  b2:   b8 02 00 00 00          mov    $0x2,%eax
  b7:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  be:   00 00 
  c0:   89 c6                   mov    %eax,%esi
  c2:   8b 02                   mov    (%rdx),%eax
  c4:   48 8b 52 08             mov    0x8(%rdx),%rdx
  c8:   01 f1                   add    %esi,%ecx
  ca:   48 85 d2                test   %rdx,%rdx
  cd:   75 f1                   jne    c0 <main+0xc0>
  cf:   01 c8                   add    %ecx,%eax
  d1:   48 8b 7c 24 68          mov    0x68(%rsp),%rdi
  d6:   64 48 33 3c 25 28 00    xor    %fs:0x28,%rdi
  dd:   00 00 
  df:   75 05                   jne    e6 <main+0xe6>
  e1:   48 83 c4 78             add    $0x78,%rsp
  e5:   c3                      retq   
  e6:   e8 00 00 00 00          callq  eb <main+0xeb>

The tail recursive version for the other answer using c++/constexpr:

/**
 * @file test3_tail.cpp
 */

typedef struct Cons {
    const int x;
    const struct Cons * const next;
} Cons;

constexpr int cons_sum_tail(const Cons c, const int accumulator) {
    if (c.next == 0) {
        return accumulator + c.x;
    } else {
        return cons_sum_tail(*(c.next), accumulator + c.x);
    }
}

constexpr int cons_sum(const Cons c) {
    return cons_sum_tail(c, 0);
}

int main(int argc, char **argv)
{    
    // | To build the stack-local linked list, with tail s0 and head h.
    static constexpr const Cons s0 {8, 0};
    static constexpr const Cons s1 {7, &s0};
    static constexpr const Cons s2 {6, &s1};
    static constexpr const Cons s3 {5, &s2};
    static constexpr const Cons s4 {4, &s3};
    static constexpr const Cons s5 {3, &s4};
    static constexpr const Cons s6 {2, &s5};
    static constexpr const Cons h {1, &s6};    
    
    // | To produce a return value(expect 36).
    constexpr int sum = cons_sum(h); 

    return sum;
}

producing the following object code (g++ test3_tail.c -o test3_tail.obj -c -O3, objdump -D test3_tail.obj):

In MinGW/MSys 64bit:

0000000000000000 <main>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   e8 00 00 00 00          callq  9 <main+0x9>
   9:   b8 24 00 00 00          mov    $0x24,%eax
   e:   48 83 c4 28             add    $0x28,%rsp

In Linux Mint 64 bit:

0000000000000000 <main>:
   0:   f3 0f 1e fa             endbr64 
   4:   b8 24 00 00 00          mov    $0x24,%eax
   9:   c3                      retq  
like image 95
Dmitry Avatar answered Oct 28 '25 18:10

Dmitry