-O1 alters floating point math

Question

The following c++ program gives a numerically different result when compiled with -O0 or -O1.

#include <iostream>
#include <array>
#include <cmath>
#include <iomanip>

int main()
{
  std::array<double, 2> v;
  v.fill(0);

  std::get<0>(v) = 0x1.5b4d3afe1f7d2p-1;
  double theta = 0x1.1aef12f82caf9p+2;

  double c = std::cos(theta);
  double s = std::sin(theta);
  double vi = c * std::get<0>(v) - s * std::get<1>(v);
  double vj = s * std::get<0>(v) + c * std::get<1>(v);
  std::get<0>(v) = vi;
  std::get<1>(v) = vj;

  std::cout << std::hexfloat;
  for (const auto& x : v)
    std::cout << x << " ";
  std::cout << std::endl;

  return 0;
}

I am using gcc 9.2.0. I employ std::hexfloat to be sure to print out double variables with full precision.

Compiling with

g++ -std=c++17 -Wall -pedantic -O0 -o test test.cpp

gives the following result (bold mine)

-0x1.8f4e436eb5371p-3 -0x1.4ca54aa5d4e1ep-1

With

 g++ -std=c++17 -Wall -pedantic -O1 -o test test.cpp

the output is

-0x1.8f4e436eb5372p-3 -0x1.4ca54aa5d4e1ep-1

The difference between the optimizations flags enabled by -O0 and -O1 that I can extract with -Q --help=optimizers are as follows

$ g++ -O0 -Q --help=optimizers >optO0.txt
$ g++ -O1 -Q --help=optimizers >optO1.txt
$ diff optO0.txt optO1.txt|grep ^'>'
>   -fbranch-count-reg                          [enabled]
>   -fcombine-stack-adjustments                 [enabled]
>   -fcompare-elim                              [enabled]
>   -fcprop-registers                           [enabled]
>   -fdefer-pop                                 [enabled]
>   -fforward-propagate                         [enabled]
>   -fguess-branch-probability                  [enabled]
>   -fif-conversion                             [enabled]
>   -fif-conversion2                            [enabled]
>   -finline-functions-called-once      [enabled]
>   -fipa-profile                               [enabled]
>   -fipa-pure-const                            [enabled]
>   -fipa-reference                             [enabled]
>   -fipa-reference-addressable                 [enabled]
>   -fmove-loop-invariants                      [enabled]
>   -fomit-frame-pointer                        [enabled]
>   -freorder-blocks                            [enabled]
>   -fshrink-wrap                               [enabled]
>   -fsplit-wide-types                          [enabled]
>   -fssa-phiopt                                [enabled]
>   -ftree-bit-ccp                              [enabled]
>   -ftree-builtin-call-dce                     [enabled]
>   -ftree-ccp                                  [enabled]
>   -ftree-ch                                   [enabled]
>   -ftree-coalesce-vars                        [enabled]
>   -ftree-copy-prop                            [enabled]
>   -ftree-dce                                  [enabled]
>   -ftree-dominator-opts                       [enabled]
>   -ftree-dse                                  [enabled]
>   -ftree-fre                                  [enabled]
>   -ftree-pta                                  [enabled]
>   -ftree-sink                                 [enabled]
>   -ftree-slsr                                 [enabled]
>   -ftree-sra                                  [enabled]
>   -ftree-ter                                  [enabled]

According to this list, as well as to the man page of gcc, -O1 does not enable optimization flags that alters floating point math, like -ffast-math , so why does the output differ?

Edit:

Using the flag -ffloat-store as suggested in the answer to this question does not change the results.

C library version is

$ ldd --version
ldd (GNU libc) 2.30

The library is installed from the package glibc 2.30-1 of Arch Linux. Architecture is x86_64.

This is the assembler dump for the -O0 case (obtained with g++ -std=c++17 -Wall -pedantic -O0 -S -o test test.cpp)

.file   "test.cpp"
    .text
    .section    .text._ZStanSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStanSt13_Ios_FmtflagsS_,comdat
    .weak   _ZStanSt13_Ios_FmtflagsS_
    .type   _ZStanSt13_Ios_FmtflagsS_, @function
_ZStanSt13_Ios_FmtflagsS_:
.LFB1415:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    %edi, -4(%rbp)
    movl    %esi, -8(%rbp)
    movl    -4(%rbp), %eax
    andl    -8(%rbp), %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1415:
    .size   _ZStanSt13_Ios_FmtflagsS_, .-_ZStanSt13_Ios_FmtflagsS_
    .section    .text._ZStorSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStorSt13_Ios_FmtflagsS_,comdat
    .weak   _ZStorSt13_Ios_FmtflagsS_
    .type   _ZStorSt13_Ios_FmtflagsS_, @function
_ZStorSt13_Ios_FmtflagsS_:
.LFB1416:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    %edi, -4(%rbp)
    movl    %esi, -8(%rbp)
    movl    -4(%rbp), %eax
    orl -8(%rbp), %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1416:
    .size   _ZStorSt13_Ios_FmtflagsS_, .-_ZStorSt13_Ios_FmtflagsS_
    .section    .text._ZStcoSt13_Ios_Fmtflags,"axG",@progbits,_ZStcoSt13_Ios_Fmtflags,comdat
    .weak   _ZStcoSt13_Ios_Fmtflags
    .type   _ZStcoSt13_Ios_Fmtflags, @function
_ZStcoSt13_Ios_Fmtflags:
.LFB1418:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    %edi, -4(%rbp)
    movl    -4(%rbp), %eax
    notl    %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1418:
    .size   _ZStcoSt13_Ios_Fmtflags, .-_ZStcoSt13_Ios_Fmtflags
    .section    .text._ZStoRRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStoRRSt13_Ios_FmtflagsS_,comdat
    .weak   _ZStoRRSt13_Ios_FmtflagsS_
    .type   _ZStoRRSt13_Ios_FmtflagsS_, @function
_ZStoRRSt13_Ios_FmtflagsS_:
.LFB1419:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movl    %esi, -12(%rbp)
    movq    -8(%rbp), %rax
    movl    (%rax), %eax
    movl    -12(%rbp), %edx
    movl    %edx, %esi
    movl    %eax, %edi
    call    _ZStorSt13_Ios_FmtflagsS_
    movq    -8(%rbp), %rdx
    movl    %eax, (%rdx)
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1419:
    .size   _ZStoRRSt13_Ios_FmtflagsS_, .-_ZStoRRSt13_Ios_FmtflagsS_
    .section    .text._ZStaNRSt13_Ios_FmtflagsS_,"axG",@progbits,_ZStaNRSt13_Ios_FmtflagsS_,comdat
    .weak   _ZStaNRSt13_Ios_FmtflagsS_
    .type   _ZStaNRSt13_Ios_FmtflagsS_, @function
_ZStaNRSt13_Ios_FmtflagsS_:
.LFB1420:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movl    %esi, -12(%rbp)
    movq    -8(%rbp), %rax
    movl    (%rax), %eax
    movl    -12(%rbp), %edx
    movl    %edx, %esi
    movl    %eax, %edi
    call    _ZStanSt13_Ios_FmtflagsS_
    movq    -8(%rbp), %rdx
    movl    %eax, (%rdx)
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1420:
    .size   _ZStaNRSt13_Ios_FmtflagsS_, .-_ZStaNRSt13_Ios_FmtflagsS_
    .section    .text._ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,"axG",@progbits,_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_,comdat
    .align 2
    .weak   _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
    .type   _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, @function
_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_:
.LFB1449:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -24(%rbp)
    movl    %esi, -28(%rbp)
    movl    %edx, -32(%rbp)
    movq    -24(%rbp), %rax
    movl    24(%rax), %eax
    movl    %eax, -4(%rbp)
    movl    -32(%rbp), %eax
    movl    %eax, %edi
    call    _ZStcoSt13_Ios_Fmtflags
    movl    %eax, %edx
    movq    -24(%rbp), %rax
    addq    $24, %rax
    movl    %edx, %esi
    movq    %rax, %rdi
    call    _ZStaNRSt13_Ios_FmtflagsS_
    movl    -32(%rbp), %edx
    movl    -28(%rbp), %eax
    movl    %edx, %esi
    movl    %eax, %edi
    call    _ZStanSt13_Ios_FmtflagsS_
    movl    %eax, %edx
    movq    -24(%rbp), %rax
    addq    $24, %rax
    movl    %edx, %esi
    movq    %rax, %rdi
    call    _ZStoRRSt13_Ios_FmtflagsS_
    movl    -4(%rbp), %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1449:
    .size   _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_, .-_ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
    .section    .text._ZSt8hexfloatRSt8ios_base,"axG",@progbits,_ZSt8hexfloatRSt8ios_base,comdat
    .weak   _ZSt8hexfloatRSt8ios_base
    .type   _ZSt8hexfloatRSt8ios_base, @function
_ZSt8hexfloatRSt8ios_base:
.LFB1481:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movl    $256, %esi
    movl    $4, %edi
    call    _ZStorSt13_Ios_FmtflagsS_
    movl    %eax, %ecx
    movq    -8(%rbp), %rax
    movl    $260, %edx
    movl    %ecx, %esi
    movq    %rax, %rdi
    call    _ZNSt8ios_base4setfESt13_Ios_FmtflagsS0_
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1481:
    .size   _ZSt8hexfloatRSt8ios_base, .-_ZSt8hexfloatRSt8ios_base
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .section    .rodata
.LC3:
    .string " "
    .text
    .globl  main
    .type   main, @function
main:
.LFB2816:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    addq    $-128, %rsp
    movq    %fs:40, %rax
    movq    %rax, -8(%rbp)
    xorl    %eax, %eax
    pxor    %xmm0, %xmm0
    movsd   %xmm0, -112(%rbp)
    leaq    -112(%rbp), %rdx
    leaq    -32(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE4fillERKd
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   .LC1(%rip), %xmm0
    movsd   %xmm0, (%rax)
    movsd   .LC2(%rip), %xmm0
    movsd   %xmm0, -96(%rbp)
    movq    -96(%rbp), %rax
    movq    %rax, %xmm0
    call    cos@PLT
    movq    %xmm0, %rax
    movq    %rax, -88(%rbp)
    movq    -96(%rbp), %rax
    movq    %rax, %xmm0
    call    sin@PLT
    movq    %xmm0, %rax
    movq    %rax, -80(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   (%rax), %xmm0
    mulsd   -88(%rbp), %xmm0
    movsd   %xmm0, -120(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   (%rax), %xmm0
    mulsd   -80(%rbp), %xmm0
    movsd   -120(%rbp), %xmm1
    subsd   %xmm0, %xmm1
    movapd  %xmm1, %xmm0
    movsd   %xmm0, -72(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   (%rax), %xmm0
    mulsd   -80(%rbp), %xmm0
    movsd   %xmm0, -120(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   (%rax), %xmm0
    mulsd   -88(%rbp), %xmm0
    addsd   -120(%rbp), %xmm0
    movsd   %xmm0, -64(%rbp)
    movsd   -72(%rbp), %xmm3
    movsd   %xmm3, -120(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   -120(%rbp), %xmm3
    movsd   %xmm3, (%rax)
    movsd   -64(%rbp), %xmm4
    movsd   %xmm4, -120(%rbp)
    leaq    -32(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
    movsd   -120(%rbp), %xmm4
    movsd   %xmm4, (%rax)
    leaq    _ZSt8hexfloatRSt8ios_base(%rip), %rsi
    leaq    _ZSt4cout(%rip), %rdi
    call    _ZNSolsEPFRSt8ios_baseS0_E@PLT
    leaq    -32(%rbp), %rax
    movq    %rax, -56(%rbp)
    movq    -56(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE5beginEv
    movq    %rax, -104(%rbp)
    movq    -56(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE3endEv
    movq    %rax, -48(%rbp)
.L17:
    movq    -104(%rbp), %rax
    cmpq    -48(%rbp), %rax
    je  .L16
    movq    -104(%rbp), %rax
    movq    %rax, -40(%rbp)
    movq    -40(%rbp), %rax
    movq    (%rax), %rax
    movq    %rax, %xmm0
    leaq    _ZSt4cout(%rip), %rdi
    call    _ZNSolsEd@PLT
    leaq    .LC3(%rip), %rsi
    movq    %rax, %rdi
    call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT
    addq    $8, -104(%rbp)
    jmp .L17
.L16:
    movq    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@GOTPCREL(%rip), %rax
    movq    %rax, %rsi
    leaq    _ZSt4cout(%rip), %rdi
    call    _ZNSolsEPFRSoS_E@PLT
    movl    $0, %eax
    movq    -8(%rbp), %rcx
    xorq    %fs:40, %rcx
    je  .L19
    call    __stack_chk_fail@PLT
.L19:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2816:
    .size   main, .-main
    .section    .text._ZNSt5arrayIdLm2EE4fillERKd,"axG",@progbits,_ZNSt5arrayIdLm2EE4fillERKd,comdat
    .align 2
    .weak   _ZNSt5arrayIdLm2EE4fillERKd
    .type   _ZNSt5arrayIdLm2EE4fillERKd, @function
_ZNSt5arrayIdLm2EE4fillERKd:
.LFB3128:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    pushq   %rbx
    subq    $24, %rsp
    .cfi_offset 3, -24
    movq    %rdi, -24(%rbp)
    movq    %rsi, -32(%rbp)
    movq    -24(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNKSt5arrayIdLm2EE4sizeEv
    movq    %rax, %rbx
    movq    -24(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE5beginEv
    movq    %rax, %rcx
    movq    -32(%rbp), %rax
    movq    %rax, %rdx
    movq    %rbx, %rsi
    movq    %rcx, %rdi
    call    _ZSt6fill_nIPdmdET_S1_T0_RKT1_
    nop
    addq    $24, %rsp
    popq    %rbx
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3128:
    .size   _ZNSt5arrayIdLm2EE4fillERKd, .-_ZNSt5arrayIdLm2EE4fillERKd
    .section    .text._ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
    .weak   _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    .type   _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3129:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    movl    $0, %esi
    movq    %rax, %rdi
    call    _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3129:
    .size   _ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm0EdLm2EERT0_RSt5arrayIS0_XT1_EE
    .section    .text._ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,"axG",@progbits,_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE,comdat
    .weak   _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
    .type   _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, @function
_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE:
.LFB3130:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    movl    $1, %esi
    movq    %rax, %rdi
    call    _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3130:
    .size   _ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE, .-_ZSt3getILm1EdLm2EERT0_RSt5arrayIS0_XT1_EE
    .section    .text._ZNSt5arrayIdLm2EE5beginEv,"axG",@progbits,_ZNSt5arrayIdLm2EE5beginEv,comdat
    .align 2
    .weak   _ZNSt5arrayIdLm2EE5beginEv
    .type   _ZNSt5arrayIdLm2EE5beginEv, @function
_ZNSt5arrayIdLm2EE5beginEv:
.LFB3132:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE4dataEv
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3132:
    .size   _ZNSt5arrayIdLm2EE5beginEv, .-_ZNSt5arrayIdLm2EE5beginEv
    .section    .text._ZNSt5arrayIdLm2EE3endEv,"axG",@progbits,_ZNSt5arrayIdLm2EE3endEv,comdat
    .align 2
    .weak   _ZNSt5arrayIdLm2EE3endEv
    .type   _ZNSt5arrayIdLm2EE3endEv, @function
_ZNSt5arrayIdLm2EE3endEv:
.LFB3133:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt5arrayIdLm2EE4dataEv
    addq    $16, %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3133:
    .size   _ZNSt5arrayIdLm2EE3endEv, .-_ZNSt5arrayIdLm2EE3endEv
    .section    .text._ZNKSt5arrayIdLm2EE4sizeEv,"axG",@progbits,_ZNKSt5arrayIdLm2EE4sizeEv,comdat
    .align 2
    .weak   _ZNKSt5arrayIdLm2EE4sizeEv
    .type   _ZNKSt5arrayIdLm2EE4sizeEv, @function
_ZNKSt5arrayIdLm2EE4sizeEv:
.LFB3247:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movl    $2, %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3247:
    .size   _ZNKSt5arrayIdLm2EE4sizeEv, .-_ZNKSt5arrayIdLm2EE4sizeEv
    .section    .text._ZSt6fill_nIPdmdET_S1_T0_RKT1_,"axG",@progbits,_ZSt6fill_nIPdmdET_S1_T0_RKT1_,comdat
    .weak   _ZSt6fill_nIPdmdET_S1_T0_RKT1_
    .type   _ZSt6fill_nIPdmdET_S1_T0_RKT1_, @function
_ZSt6fill_nIPdmdET_S1_T0_RKT1_:
.LFB3248:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -8(%rbp)
    movq    %rsi, -16(%rbp)
    movq    %rdx, -24(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, %rdi
    call    _ZSt12__niter_baseIPdET_S1_
    movq    %rax, %rcx
    movq    -24(%rbp), %rdx
    movq    -16(%rbp), %rax
    movq    %rax, %rsi
    movq    %rcx, %rdi
    call    _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
    movq    %rax, %rdx
    leaq    -8(%rbp), %rax
    movq    %rdx, %rsi
    movq    %rax, %rdi
    call    _ZSt12__niter_wrapIPdET_RKS1_S1_
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3248:
    .size   _ZSt6fill_nIPdmdET_S1_T0_RKT1_, .-_ZSt6fill_nIPdmdET_S1_T0_RKT1_
    .section    .text._ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm,comdat
    .weak   _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
    .type   _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, @function
_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm:
.LFB3249:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movq    %rsi, -16(%rbp)
    movq    -16(%rbp), %rax
    leaq    0(,%rax,8), %rdx
    movq    -8(%rbp), %rax
    addq    %rdx, %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3249:
    .size   _ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm, .-_ZNSt14__array_traitsIdLm2EE6_S_refERA2_Kdm
    .section    .text._ZNSt5arrayIdLm2EE4dataEv,"axG",@progbits,_ZNSt5arrayIdLm2EE4dataEv,comdat
    .align 2
    .weak   _ZNSt5arrayIdLm2EE4dataEv
    .type   _ZNSt5arrayIdLm2EE4dataEv, @function
_ZNSt5arrayIdLm2EE4dataEv:
.LFB3250:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, %rdi
    call    _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3250:
    .size   _ZNSt5arrayIdLm2EE4dataEv, .-_ZNSt5arrayIdLm2EE4dataEv
    .section    .text._ZSt12__niter_baseIPdET_S1_,"axG",@progbits,_ZSt12__niter_baseIPdET_S1_,comdat
    .weak   _ZSt12__niter_baseIPdET_S1_
    .type   _ZSt12__niter_baseIPdET_S1_, @function
_ZSt12__niter_baseIPdET_S1_:
.LFB3318:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3318:
    .size   _ZSt12__niter_baseIPdET_S1_, .-_ZSt12__niter_baseIPdET_S1_
    .section    .text._ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,"axG",@progbits,_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_,comdat
    .weak   _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
    .type   _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, @function
_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_:
.LFB3319:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -24(%rbp)
    movq    %rsi, -32(%rbp)
    movq    %rdx, -40(%rbp)
    movq    -40(%rbp), %rax
    movsd   (%rax), %xmm0
    movsd   %xmm0, -8(%rbp)
    movq    -32(%rbp), %rax
    movq    %rax, -16(%rbp)
.L41:
    cmpq    $0, -16(%rbp)
    je  .L40
    movq    -24(%rbp), %rax
    movsd   -8(%rbp), %xmm0
    movsd   %xmm0, (%rax)
    subq    $1, -16(%rbp)
    addq    $8, -24(%rbp)
    jmp .L41
.L40:
    movq    -24(%rbp), %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3319:
    .size   _ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_, .-_ZSt10__fill_n_aIPdmdEN9__gnu_cxx11__enable_ifIXsrSt11__is_scalarIT1_E7__valueET_E6__typeES6_T0_RKS4_
    .section    .text._ZSt12__niter_wrapIPdET_RKS1_S1_,"axG",@progbits,_ZSt12__niter_wrapIPdET_RKS1_S1_,comdat
    .weak   _ZSt12__niter_wrapIPdET_RKS1_S1_
    .type   _ZSt12__niter_wrapIPdET_RKS1_S1_, @function
_ZSt12__niter_wrapIPdET_RKS1_S1_:
.LFB3320:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movq    %rsi, -16(%rbp)
    movq    -16(%rbp), %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3320:
    .size   _ZSt12__niter_wrapIPdET_RKS1_S1_, .-_ZSt12__niter_wrapIPdET_RKS1_S1_
    .section    .text._ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,"axG",@progbits,_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd,comdat
    .weak   _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
    .type   _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, @function
_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd:
.LFB3321:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movq    -8(%rbp), %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3321:
    .size   _ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd, .-_ZNSt14__array_traitsIdLm2EE6_S_ptrERA2_Kd
    .text
    .type   _Z41__static_initialization_and_destruction_0ii, @function
_Z41__static_initialization_and_destruction_0ii:
.LFB3455:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movl    %edi, -4(%rbp)
    movl    %esi, -8(%rbp)
    cmpl    $1, -4(%rbp)
    jne .L49
    cmpl    $65535, -8(%rbp)
    jne .L49
    leaq    _ZStL8__ioinit(%rip), %rdi
    call    _ZNSt8ios_base4InitC1Ev@PLT
    leaq    __dso_handle(%rip), %rdx
    leaq    _ZStL8__ioinit(%rip), %rsi
    movq    _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rax
    movq    %rax, %rdi
    call    __cxa_atexit@PLT
.L49:
    nop
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3455:
    .size   _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3456:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    $65535, %esi
    movl    $1, %edi
    call    _Z41__static_initialization_and_destruction_0ii
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE3456:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .init_array,"aw"
    .align 8
    .quad   _GLOBAL__sub_I_main
    .section    .rodata
    .align 8
.LC1:
    .long   2950821842
    .long   1072018643
    .align 8
.LC2:
    .long   797100793
    .long   1074900721
    .hidden __dso_handle
    .ident  "GCC: (GNU) 9.2.0"
    .section    .note.GNU-stack,"",@progbits

The assembler dump when using -O1 (g++ -std=c++17 -Wall -pedantic -O1 -S -o test test.cpp) is

    .file   "test.cpp"
    .text
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " "
    .text
    .globl  main
    .type   main, @function
main:
.LFB2853:
    .cfi_startproc
    pushq   %rbx
    .cfi_def_cfa_offset 16
    .cfi_offset 3, -16
    leaq    _ZSt4cout(%rip), %rbx
    movq    _ZSt4cout(%rip), %rax
    movq    %rbx, %rcx
    addq    -24(%rax), %rcx
    orl $260, 24(%rcx)
    movsd   .LC0(%rip), %xmm0
    movq    %rbx, %rdi
    call    _ZNSo9_M_insertIdEERSoT_@PLT
    movq    %rax, %rdi
    movl    $1, %edx
    leaq    .LC1(%rip), %rsi
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
    movsd   .LC2(%rip), %xmm0
    movq    %rbx, %rdi
    call    _ZNSo9_M_insertIdEERSoT_@PLT
    movq    %rax, %rdi
    movl    $1, %edx
    leaq    .LC1(%rip), %rsi
    call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
    movq    %rbx, %rdi
    call    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT
    movl    $0, %eax
    popq    %rbx
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE2853:
    .size   main, .-main
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB3477:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    leaq    _ZStL8__ioinit(%rip), %rdi
    call    _ZNSt8ios_base4InitC1Ev@PLT
    leaq    __dso_handle(%rip), %rdx
    leaq    _ZStL8__ioinit(%rip), %rsi
    movq    _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi
    call    __cxa_atexit@PLT
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE3477:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .init_array,"aw"
    .align 8
    .quad   _GLOBAL__sub_I_main
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC0:
    .long   921391986
    .long   -1077349148
    .align 8
.LC2:
    .long   2858241566
    .long   -1075525036
    .hidden __dso_handle
    .ident  "GCC: (GNU) 9.2.0"
    .section    .note.GNU-stack,"",@progbits

Edit II

As requested, objdump -d on the executable compiled with -O0 can be found here (I cannot post it here because of length constraint). Output of ldd indicates that the executable is linked agains /usr/lib/libm.so.6. A part of the output of objdump -d /bin/libm.so.6 (sections .init, .plt, sin and cos) is found here.

Florian Weimer · Accepted Answer

With -O1, the floating computation happens at compile time, using the GNU MPFR library. MPFR is expected to give a correctly rounded result even for functions such as sin and cos. Your math library likely has different accuracy goals for these functions, which is why run-time computation (at the -O0 optimization level) sometimes gives different results. For example, the GNU C library has a general accuracy goal of a few ulp.

Reportedly, IEEE 754 only has accuracy requirements for a subset of the math library functions (sqrt, apparently), which enables math libraries to choose different trade-offs between speed and accuracy for the transcendental functions. (I do not have access to IEEE 754 because IEEE is opposed to the open dissemination of knowledge unfortunately.)

-O1 alters floating point math

Tags:

c++

floating-point

gcc

francesco

1 Answers

Florian Weimer

Recent Activity

Donate For Us

-O1 alters floating point math

Tags:

c++

floating-point

gcc

francesco

1 Answers

Florian Weimer

Related questions

Recent Activity

Donate For Us