The following c++
program gives a numerically different result when compiled with -O0
or -O1
.
#include <iostream>
#include <array>
#include <cmath>
#include <iomanip>
int main()
{
std::array<double, 2> v;
v.fill(0);
std::get<0>(v) = 0x1.5b4d3afe1f7d2p-1;
double theta = 0x1.1aef12f82caf9p+2;
double c = std::cos(theta);
double s = std::sin(theta);
double vi = c * std::get<0>(v) - s * std::get<1>(v);
double vj = s * std::get<0>(v) + c * std::get<1>(v);
std::get<0>(v) = vi;
std::get<1>(v) = vj;
std::cout << std::hexfloat;
for (const auto& x : v)
std::cout << x << " ";
std::cout << std::endl;
return 0;
}
I am using gcc
9.2.0. I employ std::hexfloat
to be sure to print out double
variables with full precision.
Compiling with
g++ -std=c++17 -Wall -pedantic -O0 -o test test.cpp
gives the following result (bold mine)
-0x1.8f4e436eb5371p-3 -0x1.4ca54aa5d4e1ep-1
With
g++ -std=c++17 -Wall -pedantic -O1 -o test test.cpp
the output is
-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1
The difference between the optimizations flags enabled by -O0
and -O1
that I can extract with -Q --help=optimizers
are as follows
$ g++ -O0 -Q --help=optimizers >optO0.txt
$ g++ -O1 -Q --help=optimizers >optO1.txt
$ diff optO0.txt optO1.txt|grep ^'>'
> -fbranch-count-reg [enabled]
> -fcombine-stack-adjustments [enabled]
> -fcompare-elim [enabled]
> -fcprop-registers [enabled]
> -fdefer-pop [enabled]
> -fforward-propagate [enabled]
> -fguess-branch-probability [enabled]
> -fif-conversion [enabled]
> -fif-conversion2 [enabled]
> -finline-functions-called-once [enabled]
> -fipa-profile [enabled]
> -fipa-pure-const [enabled]
> -fipa-reference [enabled]
> -fipa-reference-addressable [enabled]
> -fmove-loop-invariants [enabled]
> -fomit-frame-pointer [enabled]
> -freorder-blocks [enabled]
> -fshrink-wrap [enabled]
> -fsplit-wide-types [enabled]
> -fssa-phiopt [enabled]
> -ftree-bit-ccp [enabled]
> -ftree-builtin-call-dce [enabled]
> -ftree-ccp [enabled]
> -ftree-ch [enabled]
> -ftree-coalesce-vars [enabled]
> -ftree-copy-prop [enabled]
> -ftree-dce [enabled]
> -ftree-dominator-opts [enabled]
> -ftree-dse [enabled]
> -ftree-fre [enabled]
> -ftree-pta [enabled]
> -ftree-sink [enabled]
> -ftree-slsr [enabled]
> -ftree-sra [enabled]
> -ftree-ter [enabled]
According to this list, as well as to the man page of gcc
, -O1
does not enable optimization flags that alters floating point math, like -ffast-math
, so why does the output differ?
Edit:
Using the flag -ffloat-store
as suggested in the answer to this question does not change the results.
C library version is
$ ldd --version
ldd (GNU libc) 2.30
The library is installed from the package glibc 2.30-1
of Arch Linux. Architecture is x86_64.
This is the assembler dump for the -O0
case (obtained with g++ -std=c++17 -Wall -pedantic -O0 -S -o test test.cpp
)
.file "test.cpp"
.text
.section .text._ZStanSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStanSt13_Ios_FmtflagsS_,comdat
.weak _ZStanSt13_Ios_FmtflagsS_
.type _ZStanSt13_Ios_FmtflagsS_, @function
_ZStanSt13_Ios_FmtflagsS_:
.LFB1415:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
andl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1415:
.size _ZStanSt13_Ios_FmtflagsS_, .-_ZStanSt13_Ios_FmtflagsS_
.section .text._ZStorSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStorSt13_Ios_FmtflagsS_,comdat
.weak _ZStorSt13_Ios_FmtflagsS_
.type _ZStorSt13_Ios_FmtflagsS_, @function
_ZStorSt13_Ios_FmtflagsS_:
.LFB1416:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -4(%rbp), %eax
orl -8(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1416:
.size _ZStorSt13_Ios_FmtflagsS_, .-_ZStorSt13_Ios_FmtflagsS_
.section .text._ZStcoSt13_Ios_Fmtflags,"axG",@progbits,_ZStcoSt13_Ios_Fmtflags,comdat
.weak _ZStcoSt13_Ios_Fmtflags
.type _ZStcoSt13_Ios_Fmtflags, @function
_ZStcoSt13_Ios_Fmtflags:
.LFB1418:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
notl %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1418:
.size _ZStcoSt13_Ios_Fmtflags, .-_ZStcoSt13_Ios_Fmtflags
.section .text._ZStoRRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStoRRSt13_Ios_FmtflagsS_,comdat
.weak _ZStoRRSt13_Ios_FmtflagsS_
.type _ZStoRRSt13_Ios_FmtflagsS_, @function
_ZStoRRSt13_Ios_FmtflagsS_:
.LFB1419:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStorSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1419:
.size _ZStoRRSt13_Ios_FmtflagsS_, .-_ZStoRRSt13_Ios_FmtflagsS_
.section .text._ZStaNRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStaNRSt13_Ios_FmtflagsS_,comdat
.weak _ZStaNRSt13_Ios_FmtflagsS_
.type _ZStaNRSt13_Ios_FmtflagsS_, @function
_ZStaNRSt13_Ios_FmtflagsS_:
.LFB1420:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
movl -12(%rbp), %edx
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movq -8(%rbp), %rdx
movl %eax, (%rdx)
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1420:
.size _ZStaNRSt13_Ios_FmtflagsS_, .-_ZStaNRSt13_Ios_FmtflagsS_
.section .text._ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,"axG",@progbits,_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,comdat
.align 2
.weak _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.type _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, @function
_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_:
.LFB1449:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -24(%rbp)
movl %esi, -28(%rbp)
movl %edx, -32(%rbp)
movq -24(%rbp), %rax
movl 24(%rax), %eax
movl %eax, -4(%rbp)
movl -32(%rbp), %eax
movl %eax, %edi
call _ZStcoSt13_Ios_Fmtflags
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStaNRSt13_Ios_FmtflagsS_
movl -32(%rbp), %edx
movl -28(%rbp), %eax
movl %edx, %esi
movl %eax, %edi
call _ZStanSt13_Ios_FmtflagsS_
movl %eax, %edx
movq -24(%rbp), %rax
addq $24, %rax
movl %edx, %esi
movq %rax, %rdi
call _ZStoRRSt13_Ios_FmtflagsS_
movl -4(%rbp), %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1449:
.size _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, .-_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
.section .text._ZSt8hexfloatRSt8ios_base,"axG",@progbits,_ZSt8hexfloatRSt8ios_base,comdat
.weak _ZSt8hexfloatRSt8ios_base
.type _ZSt8hexfloatRSt8ios_base, @function
_ZSt8hexfloatRSt8ios_base:
.LFB1481:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movl $256, %esi
movl $4, %edi
call _ZStorSt13_Ios_FmtflagsS_
movl %eax, %ecx
movq -8(%rbp), %rax
movl $260, %edx
movl %ecx, %esi
movq %rax, %rdi
call _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
movq -8(%rbp), %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1481:
.size _ZSt8hexfloatRSt8ios_base, .-_ZSt8hexfloatRSt8ios_base
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata
.LC3:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2816:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
addq $-128, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
pxor %xmm0, %xmm0
movsd %xmm0, -112(%rbp)
leaq -112(%rbp), %rdx
leaq -32(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4fillERKd
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd .LC1(%rip), %xmm0
movsd %xmm0, (%rax)
movsd .LC2(%rip), %xmm0
movsd %xmm0, -96(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call cos@PLT
movq %xmm0, %rax
movq %rax, -88(%rbp)
movq -96(%rbp), %rax
movq %rax, %xmm0
call sin@PLT
movq %xmm0, %rax
movq %rax, -80(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd -120(%rbp), %xmm1
subsd %xmm0, %xmm1
movapd %xmm1, %xmm0
movsd %xmm0, -72(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -80(%rbp), %xmm0
movsd %xmm0, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd (%rax), %xmm0
mulsd -88(%rbp), %xmm0
addsd -120(%rbp), %xmm0
movsd %xmm0, -64(%rbp)
movsd -72(%rbp), %xmm3
movsd %xmm3, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm3
movsd %xmm3, (%rax)
movsd -64(%rbp), %xmm4
movsd %xmm4, -120(%rbp)
leaq -32(%rbp), %rax
movq %rax, %rdi
call _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
movsd -120(%rbp), %xmm4
movsd %xmm4, (%rax)
leaq _ZSt8hexfloatRSt8ios_base(%rip), %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSt8ios_baseS0_E@PLT
leaq -32(%rbp), %rax
movq %rax, -56(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, -104(%rbp)
movq -56(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE3endEv
movq %rax, -48(%rbp)
.L17:
movq -104(%rbp), %rax
cmpq -48(%rbp), %rax
je .L16
movq -104(%rbp), %rax
movq %rax, -40(%rbp)
movq -40(%rbp), %rax
movq (%rax), %rax
movq %rax, %xmm0
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEd@PLT
leaq .LC3(%rip), %rsi
movq %rax, %rdi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
addq $8, -104(%rbp)
jmp .L17
.L16:
movq _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@GOTPCREL(%rip), %rax
movq %rax, %rsi
leaq _ZSt4cout(%rip), %rdi
call _ZNSolsEPFRSoS_E@PLT
movl $0, %eax
movq -8(%rbp), %rcx
xorq %fs:40, %rcx
je .L19
call __stack_chk_fail@PLT
.L19:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2816:
.size main, .-main
.section .text._ZNSt5arrayIdLm2EE4fillERKd,"axG",@progbits,_ZNSt5arrayIdLm2EE4fillERKd,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4fillERKd
.type _ZNSt5arrayIdLm2EE4fillERKd, @function
_ZNSt5arrayIdLm2EE4fillERKd:
.LFB3128:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
.cfi_offset 3, -24
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNKSt5arrayIdLm2EE4sizeEv
movq %rax, %rbx
movq -24(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE5beginEv
movq %rax, %rcx
movq -32(%rbp), %rax
movq %rax, %rdx
movq %rbx, %rsi
movq %rcx, %rdi
call _ZSt6fill_nIPdmdET_S1_T0_RKT1_
nop
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3128:
.size _ZNSt5arrayIdLm2EE4fillERKd, .-_ZNSt5arrayIdLm2EE4fillERKd
.section .text._ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3129:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $0, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3129:
.size _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
.weak _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.type _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3130:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $1, %esi
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3130:
.size _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
.section .text._ZNSt5arrayIdLm2EE5beginEv,"axG",@progbits,_ZNSt5arrayIdLm2EE5beginEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE5beginEv
.type _ZNSt5arrayIdLm2EE5beginEv, @function
_ZNSt5arrayIdLm2EE5beginEv:
.LFB3132:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3132:
.size _ZNSt5arrayIdLm2EE5beginEv, .-_ZNSt5arrayIdLm2EE5beginEv
.section .text._ZNSt5arrayIdLm2EE3endEv,"axG",@progbits,_ZNSt5arrayIdLm2EE3endEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE3endEv
.type _ZNSt5arrayIdLm2EE3endEv, @function
_ZNSt5arrayIdLm2EE3endEv:
.LFB3133:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt5arrayIdLm2EE4dataEv
addq $16, %rax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3133:
.size _ZNSt5arrayIdLm2EE3endEv, .-_ZNSt5arrayIdLm2EE3endEv
.section .text._ZNKSt5arrayIdLm2EE4sizeEv,"axG",@progbits,_ZNKSt5arrayIdLm2EE4sizeEv,comdat
.align 2
.weak _ZNKSt5arrayIdLm2EE4sizeEv
.type _ZNKSt5arrayIdLm2EE4sizeEv, @function
_ZNKSt5arrayIdLm2EE4sizeEv:
.LFB3247:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movl $2, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3247:
.size _ZNKSt5arrayIdLm2EE4sizeEv, .-_ZNKSt5arrayIdLm2EE4sizeEv
.section .text._ZSt6fill_nIPdmdET_S1_T0_RKT1_,"axG",@progbits,_ZSt6fill_nIPdmdET_S1_T0_RKT1_,comdat
.weak _ZSt6fill_nIPdmdET_S1_T0_RKT1_
.type _ZSt6fill_nIPdmdET_S1_T0_RKT1_, @function
_ZSt6fill_nIPdmdET_S1_T0_RKT1_:
.LFB3248:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq %rdx, -24(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZSt12__niter_baseIPdET_S1_
movq %rax, %rcx
movq -24(%rbp), %rdx
movq -16(%rbp), %rax
movq %rax, %rsi
movq %rcx, %rdi
call _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
movq %rax, %rdx
leaq -8(%rbp), %rax
movq %rdx, %rsi
movq %rax, %rdi
call _ZSt12__niter_wrapIPdET_RKS1_S1_
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3248:
.size _ZSt6fill_nIPdmdET_S1_T0_RKT1_, .-_ZSt6fill_nIPdmdET_S1_T0_RKT1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.type _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, @function
_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm:
.LFB3249:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
leaq 0(,%rax,8), %rdx
movq -8(%rbp), %rax
addq %rdx, %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3249:
.size _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, .-_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
.section .text._ZNSt5arrayIdLm2EE4dataEv,"axG",@progbits,_ZNSt5arrayIdLm2EE4dataEv,comdat
.align 2
.weak _ZNSt5arrayIdLm2EE4dataEv
.type _ZNSt5arrayIdLm2EE4dataEv, @function
_ZNSt5arrayIdLm2EE4dataEv:
.LFB3250:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3250:
.size _ZNSt5arrayIdLm2EE4dataEv, .-_ZNSt5arrayIdLm2EE4dataEv
.section .text._ZSt12__niter_baseIPdET_S1_,"axG",@progbits,_ZSt12__niter_baseIPdET_S1_,comdat
.weak _ZSt12__niter_baseIPdET_S1_
.type _ZSt12__niter_baseIPdET_S1_, @function
_ZSt12__niter_baseIPdET_S1_:
.LFB3318:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3318:
.size _ZSt12__niter_baseIPdET_S1_, .-_ZSt12__niter_baseIPdET_S1_
.section .text._ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,"axG",@progbits,_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,comdat
.weak _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.type _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, @function
_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_:
.LFB3319:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq %rdx, -40(%rbp)
movq -40(%rbp), %rax
movsd (%rax), %xmm0
movsd %xmm0, -8(%rbp)
movq -32(%rbp), %rax
movq %rax, -16(%rbp)
.L41:
cmpq $0, -16(%rbp)
je .L40
movq -24(%rbp), %rax
movsd -8(%rbp), %xmm0
movsd %xmm0, (%rax)
subq $1, -16(%rbp)
addq $8, -24(%rbp)
jmp .L41
.L40:
movq -24(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3319:
.size _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, .-_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
.section .text._ZSt12__niter_wrapIPdET_RKS1_S1_,"axG",@progbits,_ZSt12__niter_wrapIPdET_RKS1_S1_,comdat
.weak _ZSt12__niter_wrapIPdET_RKS1_S1_
.type _ZSt12__niter_wrapIPdET_RKS1_S1_, @function
_ZSt12__niter_wrapIPdET_RKS1_S1_:
.LFB3320:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq -16(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3320:
.size _ZSt12__niter_wrapIPdET_RKS1_S1_, .-_ZSt12__niter_wrapIPdET_RKS1_S1_
.section .text._ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,comdat
.weak _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.type _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, @function
_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd:
.LFB3321:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3321:
.size _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, .-_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
.text
.type _Z41__static_initialization_and_destruction_0ii, @function
_Z41__static_initialization_and_destruction_0ii:
.LFB3455:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
cmpl $1, -4(%rbp)
jne .L49
cmpl $65535, -8(%rbp)
jne .L49
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rax
movq %rax, %rdi
call __cxa_atexit@PLT
.L49:
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3455:
.size _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3456:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $65535, %esi
movl $1, %edi
call _Z41__static_initialization_and_destruction_0ii
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3456:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.section .rodata
.align 8
.LC1:
.long 2950821842
.long 1072018643
.align 8
.LC2:
.long 797100793
.long 1074900721
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits
The assembler dump when using -O1
(g++ -std=c++17 -Wall -pedantic -O1 -S -o test test.cpp
) is
.file "test.cpp"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC1:
.string " "
.text
.globl main
.type main, @function
main:
.LFB2853:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
leaq _ZSt4cout(%rip), %rbx
movq _ZSt4cout(%rip), %rax
movq %rbx, %rcx
addq -24(%rax), %rcx
orl $260, 24(%rcx)
movsd .LC0(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movsd .LC2(%rip), %xmm0
movq %rbx, %rdi
call _ZNSo9_M_insertIdEERSoT_@PLT
movq %rax, %rdi
movl $1, %edx
leaq .LC1(%rip), %rsi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
movq %rbx, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT
movl $0, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE2853:
.size main, .-main
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3477:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
leaq _ZStL8__ioinit(%rip), %rdi
call _ZNSt8ios_base4InitC1Ev@PLT
leaq __dso_handle(%rip), %rdx
leaq _ZStL8__ioinit(%rip), %rsi
movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
call __cxa_atexit@PLT
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE3477:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC0:
.long 921391986
.long -1077349148
.align 8
.LC2:
.long 2858241566
.long -1075525036
.hidden __dso_handle
.ident "GCC: (GNU) 9.2.0"
.section .note.GNU-stack,"",@progbits
Edit II
As requested, objdump -d
on the executable compiled with -O0
can be found here (I cannot post it here because of length constraint). Output of ldd
indicates that the executable is linked agains /usr/lib/libm.so.6
. A part of the output of objdump -d /bin/libm.so.6
(sections .init, .plt, sin and cos) is found here.
With -O1
, the floating computation happens at compile time, using the GNU MPFR library. MPFR is expected to give a correctly rounded result even for functions such as sin
and cos
. Your math library likely has different accuracy goals for these functions, which is why run-time computation (at the -O0
optimization level) sometimes gives different results. For example, the GNU C library has a general accuracy goal of a few ulp.
Reportedly, IEEE 754 only has accuracy requirements for a subset of the math library functions (sqrt
, apparently), which enables math libraries to choose different trade-offs between speed and accuracy for the transcendental functions. (I do not have access to IEEE 754 because IEEE is opposed to the open dissemination of knowledge unfortunately.)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With