Recompiling an old program made it output the wrong result. I'd like to know why.
I know that -Ofast
may "disregard strict standards compliance" but I'm curious about what happens under the hood.
I reduced the program to this minimal example foo1.c
:
#include <stdio.h>
double my_pow(double x, unsigned n)
{ /* returns x^n */
double y = 1;
while(n--) y *= x;
return y;
}
void foo(double small)
{ /* prints small^19 */
double x = my_pow(small,19);
printf("%E\n",x);
printf("%E\n",x);
}
int main(void)
{
foo(1-0.8-0.2);
return 0;
}
When compiled with -Ofast
it gives a different output than with any other optimization level.
gcc -Ofast foo1.c && ./a.out
:
-0.000000E+00
-0.000000E+00
gcc foo1.c && ./a.out
:
-1.390671E-309
-1.390671E-309
A strange fact is that when one of the printf
is commented out (file foo2.c
) this behavior doesn't replicate making it a sort of heisenbug.
gcc -Ofast foo2.c && ./a.out
:
-1.390671E-309
gcc foo2.c && ./a.out
:
-1.390671E-309
gcc -v
:
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC)
gcc -Ofast foo1.c -S -o -
:
.file "foo1.c"
.text
.p2align 4,,15
.globl my_pow
.type my_pow, @function
my_pow:
.LFB11:
.cfi_startproc
testl %edi, %edi
leal -1(%rdi), %edx
je .L10
movl %edi, %ecx
shrl %ecx
movl %ecx, %esi
addl %esi, %esi
je .L11
cmpl $9, %edi
jbe .L11
movapd %xmm0, %xmm1
movapd .LC0(%rip), %xmm2
xorl %eax, %eax
unpcklpd %xmm1, %xmm1
.L9:
addl $1, %eax
mulpd %xmm1, %xmm2
cmpl %eax, %ecx
ja .L9
movapd %xmm2, -24(%rsp)
subl %esi, %edx
cmpl %esi, %edi
movsd -16(%rsp), %xmm1
mulsd %xmm2, %xmm1
je .L2
testl %edx, %edx
mulsd %xmm0, %xmm1
je .L2
.L35:
cmpl $1, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $2, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $3, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $4, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $5, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $6, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $7, %edx
mulsd %xmm0, %xmm1
je .L2
mulsd %xmm0, %xmm1
.p2align 4,,10
.p2align 3
.L2:
movapd %xmm1, %xmm0
ret
.p2align 4,,10
.p2align 3
.L11:
movsd .LC1(%rip), %xmm1
testl %edx, %edx
mulsd %xmm0, %xmm1
je .L2
jmp .L35
.p2align 4,,10
.p2align 3
.L10:
movsd .LC1(%rip), %xmm1
jmp .L2
.cfi_endproc
.LFE11:
.size my_pow, .-my_pow
.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "%E\n"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
.LFB12:
.cfi_startproc
movapd %xmm0, %xmm2
subq $24, %rsp
.cfi_def_cfa_offset 32
movl $.LC2, %edi
movl $1, %eax
unpcklpd %xmm2, %xmm2
movapd %xmm2, %xmm1
mulpd %xmm2, %xmm1
mulpd %xmm1, %xmm1
mulpd %xmm1, %xmm1
mulpd %xmm2, %xmm1
movapd %xmm1, %xmm2
unpckhpd %xmm1, %xmm1
mulsd %xmm1, %xmm2
mulsd %xmm0, %xmm2
movapd %xmm2, %xmm0
movsd %xmm2, 8(%rsp)
call printf
movsd 8(%rsp), %xmm2
movl $.LC2, %edi
movl $1, %eax
addq $24, %rsp
.cfi_def_cfa_offset 8
movapd %xmm2, %xmm0
jmp printf
.cfi_endproc
.LFE12:
.size foo, .-foo
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB13:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movsd .LC3(%rip), %xmm0
call foo
xorl %eax, %eax
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE13:
.size main, .-main
.section .rodata.cst16,"aM",@progbits,16
.align 16
.LC0:
.long 0
.long 1072693248
.long 0
.long 1072693248
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC1:
.long 0
.long 1072693248
.align 8
.LC3:
.long 0
.long -1131413504
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
.section .note.GNU-stack,"",@progbits
gcc foo1.c -S -o -
:
.file "foo1.c"
.text
.globl my_pow
.type my_pow, @function
my_pow:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movsd %xmm0, -24(%rbp)
movl %edi, -28(%rbp)
movabsq $4607182418800017408, %rax
movq %rax, -8(%rbp)
jmp .L2
.L3:
movsd -8(%rbp), %xmm0
mulsd -24(%rbp), %xmm0
movsd %xmm0, -8(%rbp)
.L2:
movl -28(%rbp), %eax
leal -1(%rax), %edx
movl %edx, -28(%rbp)
testl %eax, %eax
jne .L3
movq -8(%rbp), %rax
movq %rax, -40(%rbp)
movsd -40(%rbp), %xmm0
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size my_pow, .-my_pow
.section .rodata
.LC1:
.string "%E\n"
.text
.globl foo
.type foo, @function
foo:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movsd %xmm0, -24(%rbp)
movq -24(%rbp), %rax
movl $19, %edi
movq %rax, -32(%rbp)
movsd -32(%rbp), %xmm0
call my_pow
movsd %xmm0, -32(%rbp)
movq -32(%rbp), %rax
movq %rax, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, -32(%rbp)
movsd -32(%rbp), %xmm0
movl $.LC1, %edi
movl $1, %eax
call printf
movq -8(%rbp), %rax
movq %rax, -32(%rbp)
movsd -32(%rbp), %xmm0
movl $.LC1, %edi
movl $1, %eax
call printf
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size foo, .-foo
.globl main
.type main, @function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movabsq $-4859383997932765184, %rax
movq %rax, -8(%rbp)
movsd -8(%rbp), %xmm0
call foo
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
.section .note.GNU-stack,"",@progbits
gcc -Ofast foo2.c -S -o -
:
.file "foo2.c"
.text
.p2align 4,,15
.globl my_pow
.type my_pow, @function
my_pow:
.LFB11:
.cfi_startproc
testl %edi, %edi
leal -1(%rdi), %edx
je .L10
movl %edi, %ecx
shrl %ecx
movl %ecx, %esi
addl %esi, %esi
je .L11
cmpl $9, %edi
jbe .L11
movapd %xmm0, %xmm1
movapd .LC0(%rip), %xmm2
xorl %eax, %eax
unpcklpd %xmm1, %xmm1
.L9:
addl $1, %eax
mulpd %xmm1, %xmm2
cmpl %eax, %ecx
ja .L9
movapd %xmm2, -24(%rsp)
subl %esi, %edx
cmpl %esi, %edi
movsd -16(%rsp), %xmm1
mulsd %xmm2, %xmm1
je .L2
testl %edx, %edx
mulsd %xmm0, %xmm1
je .L2
.L35:
cmpl $1, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $2, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $3, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $4, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $5, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $6, %edx
mulsd %xmm0, %xmm1
je .L2
cmpl $7, %edx
mulsd %xmm0, %xmm1
je .L2
mulsd %xmm0, %xmm1
.p2align 4,,10
.p2align 3
.L2:
movapd %xmm1, %xmm0
ret
.p2align 4,,10
.p2align 3
.L11:
movsd .LC1(%rip), %xmm1
testl %edx, %edx
mulsd %xmm0, %xmm1
je .L2
jmp .L35
.p2align 4,,10
.p2align 3
.L10:
movsd .LC1(%rip), %xmm1
jmp .L2
.cfi_endproc
.LFE11:
.size my_pow, .-my_pow
.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "%E\n"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
.LFB12:
.cfi_startproc
movapd %xmm0, %xmm2
movl $.LC2, %edi
movl $1, %eax
unpcklpd %xmm2, %xmm2
movapd %xmm2, %xmm1
mulpd %xmm2, %xmm1
mulpd %xmm1, %xmm1
mulpd %xmm1, %xmm1
mulpd %xmm2, %xmm1
movapd %xmm1, %xmm2
unpckhpd %xmm1, %xmm1
mulsd %xmm1, %xmm2
mulsd %xmm0, %xmm2
movapd %xmm2, %xmm0
jmp printf
.cfi_endproc
.LFE12:
.size foo, .-foo
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB13:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl $.LC2, %edi
movl $1, %eax
movsd .LC3(%rip), %xmm0
call printf
xorl %eax, %eax
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE13:
.size main, .-main
.section .rodata.cst16,"aM",@progbits,16
.align 16
.LC0:
.long 0
.long 1072693248
.long 0
.long 1072693248
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC1:
.long 0
.long 1072693248
.align 8
.LC3:
.long 0
.long -2147418112
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
.section .note.GNU-stack,"",@progbits
gcc foo2.c -S -o -
:
.file "foo2.c"
.text
.globl my_pow
.type my_pow, @function
my_pow:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movsd %xmm0, -24(%rbp)
movl %edi, -28(%rbp)
movabsq $4607182418800017408, %rax
movq %rax, -8(%rbp)
jmp .L2
.L3:
movsd -8(%rbp), %xmm0
mulsd -24(%rbp), %xmm0
movsd %xmm0, -8(%rbp)
.L2:
movl -28(%rbp), %eax
leal -1(%rax), %edx
movl %edx, -28(%rbp)
testl %eax, %eax
jne .L3
movq -8(%rbp), %rax
movq %rax, -40(%rbp)
movsd -40(%rbp), %xmm0
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size my_pow, .-my_pow
.section .rodata
.LC1:
.string "%E\n"
.text
.globl foo
.type foo, @function
foo:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movsd %xmm0, -24(%rbp)
movq -24(%rbp), %rax
movl $19, %edi
movq %rax, -32(%rbp)
movsd -32(%rbp), %xmm0
call my_pow
movsd %xmm0, -32(%rbp)
movq -32(%rbp), %rax
movq %rax, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, -32(%rbp)
movsd -32(%rbp), %xmm0
movl $.LC1, %edi
movl $1, %eax
call printf
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size foo, .-foo
.globl main
.type main, @function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movabsq $-4859383997932765184, %rax
movq %rax, -8(%rbp)
movsd -8(%rbp), %xmm0
call foo
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
.section .note.GNU-stack,"",@progbits
Under -ffast-math
(and it's siblings like -Ofast
) gcc links your app with a special startup code in crtfastmath.c
which sets flush-to-zero flag:
static void __attribute__((constructor))
set_fast_math (void)
{
#ifndef __x86_64__
...
#else
unsigned int mxcsr = __builtin_ia32_stmxcsr ();
mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
__builtin_ia32_ldmxcsr (mxcsr);
#endif
}
(from here).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With