Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why GCC -Ofast makes the program wrong but only when it prints the result twice?

Tags:

c

gcc

Recompiling an old program made it output the wrong result. I'd like to know why.

I know that -Ofast may "disregard strict standards compliance" but I'm curious about what happens under the hood.

I reduced the program to this minimal example foo1.c:

#include <stdio.h>

double my_pow(double x, unsigned n)
{ /* returns x^n */
        double y = 1;

        while(n--) y *= x;
        return y;
}

void foo(double small)
{ /* prints small^19 */
        double x = my_pow(small,19);

        printf("%E\n",x);
        printf("%E\n",x);

}

int main(void)
{
        foo(1-0.8-0.2);

        return 0;
}

When compiled with -Ofast it gives a different output than with any other optimization level.

gcc -Ofast foo1.c && ./a.out:

-0.000000E+00
-0.000000E+00

gcc foo1.c && ./a.out:

-1.390671E-309
-1.390671E-309

A strange fact is that when one of the printf is commented out (file foo2.c) this behavior doesn't replicate making it a sort of heisenbug.

gcc -Ofast foo2.c && ./a.out:

-1.390671E-309

gcc foo2.c && ./a.out:

-1.390671E-309

Informations that might be useful:

gcc -v:

Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC)

gcc -Ofast foo1.c -S -o -:

    .file   "foo1.c"
    .text
    .p2align 4,,15
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB11:
    .cfi_startproc
    testl   %edi, %edi
    leal    -1(%rdi), %edx
    je  .L10
    movl    %edi, %ecx
    shrl    %ecx
    movl    %ecx, %esi
    addl    %esi, %esi
    je  .L11
    cmpl    $9, %edi
    jbe .L11
    movapd  %xmm0, %xmm1
    movapd  .LC0(%rip), %xmm2
    xorl    %eax, %eax
    unpcklpd    %xmm1, %xmm1
.L9:
    addl    $1, %eax
    mulpd   %xmm1, %xmm2
    cmpl    %eax, %ecx
    ja  .L9
    movapd  %xmm2, -24(%rsp)
    subl    %esi, %edx
    cmpl    %esi, %edi
    movsd   -16(%rsp), %xmm1
    mulsd   %xmm2, %xmm1
    je  .L2
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
.L35:
    cmpl    $1, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $2, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $3, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $4, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $5, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $6, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $7, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    mulsd   %xmm0, %xmm1
    .p2align 4,,10
    .p2align 3
.L2:
    movapd  %xmm1, %xmm0
    ret
    .p2align 4,,10
    .p2align 3
.L11:
    movsd   .LC1(%rip), %xmm1
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    jmp .L35
    .p2align 4,,10
    .p2align 3
.L10:
    movsd   .LC1(%rip), %xmm1
    jmp .L2
    .cfi_endproc
.LFE11:
    .size   my_pow, .-my_pow
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC2:
    .string "%E\n"
    .text
    .p2align 4,,15
    .globl  foo
    .type   foo, @function
foo:
.LFB12:
    .cfi_startproc
    movapd  %xmm0, %xmm2
    subq    $24, %rsp
    .cfi_def_cfa_offset 32
    movl    $.LC2, %edi
    movl    $1, %eax
    unpcklpd    %xmm2, %xmm2
    movapd  %xmm2, %xmm1
    mulpd   %xmm2, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm2, %xmm1
    movapd  %xmm1, %xmm2
    unpckhpd    %xmm1, %xmm1
    mulsd   %xmm1, %xmm2
    mulsd   %xmm0, %xmm2
    movapd  %xmm2, %xmm0
    movsd   %xmm2, 8(%rsp)
    call    printf
    movsd   8(%rsp), %xmm2
    movl    $.LC2, %edi
    movl    $1, %eax
    addq    $24, %rsp
    .cfi_def_cfa_offset 8
    movapd  %xmm2, %xmm0
    jmp printf
    .cfi_endproc
.LFE12:
    .size   foo, .-foo
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB13:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movsd   .LC3(%rip), %xmm0
    call    foo
    xorl    %eax, %eax
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE13:
    .size   main, .-main
    .section    .rodata.cst16,"aM",@progbits,16
    .align 16
.LC0:
    .long   0
    .long   1072693248
    .long   0
    .long   1072693248
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC1:
    .long   0
    .long   1072693248
    .align 8
.LC3:
    .long   0
    .long   -1131413504
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc foo1.c -S -o -:

    .file   "foo1.c"
    .text
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB0:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movsd   %xmm0, -24(%rbp)
    movl    %edi, -28(%rbp)
    movabsq $4607182418800017408, %rax
    movq    %rax, -8(%rbp)
    jmp .L2
.L3:
    movsd   -8(%rbp), %xmm0
    mulsd   -24(%rbp), %xmm0
    movsd   %xmm0, -8(%rbp)
.L2:
    movl    -28(%rbp), %eax
    leal    -1(%rax), %edx
    movl    %edx, -28(%rbp)
    testl   %eax, %eax
    jne .L3
    movq    -8(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   my_pow, .-my_pow
    .section    .rodata
.LC1:
    .string "%E\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
.LFB1:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movsd   %xmm0, -24(%rbp)
    movq    -24(%rbp), %rax
    movl    $19, %edi
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    call    my_pow
    movsd   %xmm0, -32(%rbp)
    movq    -32(%rbp), %rax
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movabsq $-4859383997932765184, %rax
    movq    %rax, -8(%rbp)
    movsd   -8(%rbp), %xmm0
    call    foo
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc -Ofast foo2.c -S -o -:

    .file   "foo2.c"
    .text
    .p2align 4,,15
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB11:
    .cfi_startproc
    testl   %edi, %edi
    leal    -1(%rdi), %edx
    je  .L10
    movl    %edi, %ecx
    shrl    %ecx
    movl    %ecx, %esi
    addl    %esi, %esi
    je  .L11
    cmpl    $9, %edi
    jbe .L11
    movapd  %xmm0, %xmm1
    movapd  .LC0(%rip), %xmm2
    xorl    %eax, %eax
    unpcklpd    %xmm1, %xmm1
.L9:
    addl    $1, %eax
    mulpd   %xmm1, %xmm2
    cmpl    %eax, %ecx
    ja  .L9
    movapd  %xmm2, -24(%rsp)
    subl    %esi, %edx
    cmpl    %esi, %edi
    movsd   -16(%rsp), %xmm1
    mulsd   %xmm2, %xmm1
    je  .L2
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
.L35:
    cmpl    $1, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $2, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $3, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $4, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $5, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $6, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $7, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    mulsd   %xmm0, %xmm1
    .p2align 4,,10
    .p2align 3
.L2:
    movapd  %xmm1, %xmm0
    ret
    .p2align 4,,10
    .p2align 3
.L11:
    movsd   .LC1(%rip), %xmm1
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    jmp .L35
    .p2align 4,,10
    .p2align 3
.L10:
    movsd   .LC1(%rip), %xmm1
    jmp .L2
    .cfi_endproc
.LFE11:
    .size   my_pow, .-my_pow
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC2:
    .string "%E\n"
    .text
    .p2align 4,,15
    .globl  foo
    .type   foo, @function
foo:
.LFB12:
    .cfi_startproc
    movapd  %xmm0, %xmm2
    movl    $.LC2, %edi
    movl    $1, %eax
    unpcklpd    %xmm2, %xmm2
    movapd  %xmm2, %xmm1
    mulpd   %xmm2, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm2, %xmm1
    movapd  %xmm1, %xmm2
    unpckhpd    %xmm1, %xmm1
    mulsd   %xmm1, %xmm2
    mulsd   %xmm0, %xmm2
    movapd  %xmm2, %xmm0
    jmp printf
    .cfi_endproc
.LFE12:
    .size   foo, .-foo
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB13:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movl    $.LC2, %edi
    movl    $1, %eax
    movsd   .LC3(%rip), %xmm0
    call    printf
    xorl    %eax, %eax
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE13:
    .size   main, .-main
    .section    .rodata.cst16,"aM",@progbits,16
    .align 16
.LC0:
    .long   0
    .long   1072693248
    .long   0
    .long   1072693248
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC1:
    .long   0
    .long   1072693248
    .align 8
.LC3:
    .long   0
    .long   -2147418112
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc foo2.c -S -o -:

    .file   "foo2.c"
    .text
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB0:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movsd   %xmm0, -24(%rbp)
    movl    %edi, -28(%rbp)
    movabsq $4607182418800017408, %rax
    movq    %rax, -8(%rbp)
    jmp .L2
.L3:
    movsd   -8(%rbp), %xmm0
    mulsd   -24(%rbp), %xmm0
    movsd   %xmm0, -8(%rbp)
.L2:
    movl    -28(%rbp), %eax
    leal    -1(%rax), %edx
    movl    %edx, -28(%rbp)
    testl   %eax, %eax
    jne .L3
    movq    -8(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   my_pow, .-my_pow
    .section    .rodata
.LC1:
    .string "%E\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
.LFB1:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movsd   %xmm0, -24(%rbp)
    movq    -24(%rbp), %rax
    movl    $19, %edi
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    call    my_pow
    movsd   %xmm0, -32(%rbp)
    movq    -32(%rbp), %rax
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movabsq $-4859383997932765184, %rax
    movq    %rax, -8(%rbp)
    movsd   -8(%rbp), %xmm0
    call    foo
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits
like image 485
Trit Setun Avatar asked Oct 15 '22 09:10

Trit Setun


1 Answers

Under -ffast-math (and it's siblings like -Ofast) gcc links your app with a special startup code in crtfastmath.c which sets flush-to-zero flag:

static void __attribute__((constructor))
set_fast_math (void)
{
#ifndef __x86_64__
...
#else
  unsigned int mxcsr = __builtin_ia32_stmxcsr ();
  mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
  __builtin_ia32_ldmxcsr (mxcsr);
#endif
}

(from here).

like image 63
yugr Avatar answered Oct 19 '22 02:10

yugr