Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Windows 7: overshoot C++ std::this_thread::sleep_for

Our code is written in C++ 11 (VS2012/Win 7-64bit). The C++ library provides a sleep_for function that we use. We observed that the C++ sleep_for sometimes shows a large overshoot. In other words we request to sleep for say 15 ms but the sleep turns out to be e.g. 100 ms. We see this when the load on the system is high.

My first reaction: “of course the sleeps "take longer" if there is a lot of load on the system and other threads are using the CPU”. However the “funny” thing is that if we replace the sleep_for by a Windows API “Sleep” call then we do not see this behavior. I also saw that the sleep_for function under water makes a call to the Window API Sleep method.

The documentation for sleep_for states:

The function blocks the calling thread for at least the time that's specified by Rel_time. This function does not throw any exceptions.

So technically the function is working. However we did not expect to see a difference between C++ sleep_for and the regular Sleep(Ex) function.

Can somebody explain this behavior?

like image 342
user3773903 Avatar asked Oct 02 '15 09:10

user3773903


1 Answers

There is quite a bit of additional code executed if using sleep_for vs SleepEx.

For example calling SleepEx(15) generates the following assembly in debug mode (Visual Studio 2015):

; 9    :    SleepEx(15, false);

    mov esi, esp
    push    0
    push    15                  ; 0000000fH
    call    DWORD PTR __imp__SleepEx@8
    cmp esi, esp
    call    __RTC_CheckEsp

By contrast this code

const std::chrono::milliseconds duration(15);
std::this_thread::sleep_for(duration);

Generates the following:

; 9    :    std::this_thread::sleep_for(std::chrono::milliseconds(15));

    mov DWORD PTR $T1[ebp], 15          ; 0000000fH
    lea eax, DWORD PTR $T1[ebp]
    push    eax
    lea ecx, DWORD PTR $T2[ebp]
    call    duration
    push    eax
    call    sleep_for
    add esp, 4

This calls into:

duration PROC ; std::chrono::duration<__int64,std::ratio<1,1000> >::duration<__int64,std::ratio<1,1000> ><int,void>, COMDAT
; _this$ = ecx

; 113  :        {   // construct from representation

    push    ebp
    mov ebp, esp
    sub esp, 204                ; 000000ccH
    push    ebx
    push    esi
    push    edi
    push    ecx
    lea edi, DWORD PTR [ebp-204]
    mov ecx, 51                 ; 00000033H
    mov eax, -858993460             ; ccccccccH
    rep stosd
    pop ecx
    mov DWORD PTR _this$[ebp], ecx

; 112  :            : _MyRep(static_cast<_Rep>(_Val))

    mov eax, DWORD PTR __Val$[ebp]
    mov eax, DWORD PTR [eax]
    cdq
    mov ecx, DWORD PTR _this$[ebp]
    mov DWORD PTR [ecx], eax
    mov DWORD PTR [ecx+4], edx

; 114  :        }

    mov eax, DWORD PTR _this$[ebp]
    pop edi
    pop esi
    pop ebx
    mov esp, ebp
    pop ebp
    ret 4
duration ENDP

And calls into

    sleep_for PROC ; std::this_thread::sleep_for<__int64,std::ratio<1,1000> >, COMDAT

    ; 151  :    {   // sleep for duration

        push    ebp
        mov ebp, esp
        sub esp, 268                ; 0000010cH
        push    ebx
        push    esi
        push    edi
        lea edi, DWORD PTR [ebp-268]
        mov ecx, 67                 ; 00000043H
        mov eax, -858993460             ; ccccccccH
        rep stosd
        mov eax, DWORD PTR ___security_cookie
        xor eax, ebp
        mov DWORD PTR __$ArrayPad$[ebp], eax

    ; 152  :    stdext::threads::xtime _Tgt = _To_xtime(_Rel_time);

        mov eax, DWORD PTR __Rel_time$[ebp]
        push    eax
        lea ecx, DWORD PTR $T1[ebp]
        push    ecx
        call    to_xtime
        add esp, 8
        mov edx, DWORD PTR [eax]
        mov DWORD PTR $T2[ebp], edx
        mov ecx, DWORD PTR [eax+4]
        mov DWORD PTR $T2[ebp+4], ecx
        mov edx, DWORD PTR [eax+8]
        mov DWORD PTR $T2[ebp+8], edx
        mov eax, DWORD PTR [eax+12]
        mov DWORD PTR $T2[ebp+12], eax
        mov ecx, DWORD PTR $T2[ebp]
        mov DWORD PTR __Tgt$[ebp], ecx
        mov edx, DWORD PTR $T2[ebp+4]
        mov DWORD PTR __Tgt$[ebp+4], edx
        mov eax, DWORD PTR $T2[ebp+8]
        mov DWORD PTR __Tgt$[ebp+8], eax
        mov ecx, DWORD PTR $T2[ebp+12]
        mov DWORD PTR __Tgt$[ebp+12], ecx

    ; 153  :    sleep_until(&_Tgt);

        lea eax, DWORD PTR __Tgt$[ebp]
        push    eax
        call    sleep_until
        add esp, 4

    ; 154  :    }

        push    edx
        mov ecx, ebp
        push    eax
        lea edx, DWORD PTR $LN5@sleep_for
        call    @_RTC_CheckStackVars@8
        pop eax
        pop edx
        pop edi
        pop esi
        pop ebx
        mov ecx, DWORD PTR __$ArrayPad$[ebp]
        xor ecx, ebp
        call    @__security_check_cookie@4
        add esp, 268                ; 0000010cH
        cmp ebp, esp
        call    __RTC_CheckEsp
        mov esp, ebp
        pop ebp
        ret 0
        npad    3
    $LN5@sleep_for:
        DD  1
        DD  $LN4@sleep_for
    $LN4@sleep_for:
        DD  -24                 ; ffffffe8H
        DD  16                  ; 00000010H
        DD  $LN3@sleep_for
    $LN3@sleep_for:
        DB  95                  ; 0000005fH
        DB  84                  ; 00000054H
        DB  103                 ; 00000067H
        DB  116                 ; 00000074H
        DB  0
    sleep_for ENDP

Some conversion happens:

to_xtime PROC ; std::_To_xtime<__int64,std::ratio<1,1000> >, COMDAT

; 758  :    {   // convert duration to xtime

    push    ebp
    mov ebp, esp
    sub esp, 348                ; 0000015cH
    push    ebx
    push    esi
    push    edi
    lea edi, DWORD PTR [ebp-348]
    mov ecx, 87                 ; 00000057H
    mov eax, -858993460             ; ccccccccH
    rep stosd
    mov eax, DWORD PTR ___security_cookie
    xor eax, ebp
    mov DWORD PTR __$ArrayPad$[ebp], eax

; 759  :    xtime _Xt;
; 760  :    if (_Rel_time <= chrono::duration<_Rep, _Period>::zero())

    lea eax, DWORD PTR $T7[ebp]
    push    eax
    call    duration_zero ; std::chrono::duration<__int64,std::ratio<1,1000> >::zero
    add esp, 4
    push    eax
    mov ecx, DWORD PTR __Rel_time$[ebp]
    push    ecx
    call    chronos_operator ; std::chrono::operator<=<__int64,std::ratio<1,1000>,__int64,std::ratio<1,1000> >
    add esp, 8
    movzx   edx, al
    test    edx, edx
    je  SHORT $LN2@To_xtime

; 761  :        {   // negative or zero relative time, return zero
; 762  :        _Xt.sec = 0;

    xorps   xmm0, xmm0
    movlpd  QWORD PTR __Xt$[ebp], xmm0

; 763  :        _Xt.nsec = 0;

    mov DWORD PTR __Xt$[ebp+8], 0

; 764  :        }
; 765  :    else

    jmp $LN3@To_xtime
$LN2@To_xtime:

; 766  :        {   // positive relative time, convert
; 767  :        chrono::nanoseconds _T0 =
; 768  :            chrono::system_clock::now().time_since_epoch();

    lea eax, DWORD PTR $T5[ebp]
    push    eax
    lea ecx, DWORD PTR $T6[ebp]
    push    ecx
    call    system_clock_now ; std::chrono::system_clock::now
    add esp, 4
    mov ecx, eax
    call    time_since_ephoch ; std::chrono::time_point<std::chrono::system_clock,std::chrono::duration<__int64,std::ratio<1,10000000> > >::time_since_epoch
    push    eax
    lea ecx, DWORD PTR __T0$8[ebp]
    call    duration ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,10000000>,void>

; 769  :        _T0 += _Rel_time;

    mov eax, DWORD PTR __Rel_time$[ebp]
    push    eax
    lea ecx, DWORD PTR $T4[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1000>,void>
    lea ecx, DWORD PTR $T4[ebp]
    push    ecx
    lea ecx, DWORD PTR __T0$8[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator+=

; 770  :        _Xt.sec = chrono::duration_cast<chrono::seconds>(_T0).count();

    lea eax, DWORD PTR __T0$8[ebp]
    push    eax
    lea ecx, DWORD PTR $T3[ebp]
    push    ecx
    call    duration_cast ; std::chrono::duration_cast<std::chrono::duration<__int64,std::ratio<1,1> >,__int64,std::ratio<1,1000000000> >
    add esp, 8
    mov ecx, eax
    call    duration_count ; std::chrono::duration<__int64,std::ratio<1,1> >::count
    mov DWORD PTR __Xt$[ebp], eax
    mov DWORD PTR __Xt$[ebp+4], edx

; 771  :        _T0 -= chrono::seconds(_Xt.sec);

    lea eax, DWORD PTR __Xt$[ebp]
    push    eax
    lea ecx, DWORD PTR $T1[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1> >::duration<__int64,std::ratio<1,1> ><__int64,void>
    push    eax
    lea ecx, DWORD PTR $T2[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1>,void>
    lea ecx, DWORD PTR $T2[ebp]
    push    ecx
    lea ecx, DWORD PTR __T0$8[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator-=

; 772  :        _Xt.nsec = (long)_T0.count();

    lea ecx, DWORD PTR __T0$8[ebp]
    call    duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::count
    mov DWORD PTR __Xt$[ebp+8], eax
$LN3@To_xtime:

; 773  :        }
; 774  :    return (_Xt);

    mov eax, DWORD PTR $T9[ebp]
    mov ecx, DWORD PTR __Xt$[ebp]
    mov DWORD PTR [eax], ecx
    mov edx, DWORD PTR __Xt$[ebp+4]
    mov DWORD PTR [eax+4], edx
    mov ecx, DWORD PTR __Xt$[ebp+8]
    mov DWORD PTR [eax+8], ecx
    mov edx, DWORD PTR __Xt$[ebp+12]
    mov DWORD PTR [eax+12], edx
    mov eax, DWORD PTR $T9[ebp]

; 775  :    }

    push    edx
    mov ecx, ebp
    push    eax
    lea edx, DWORD PTR $LN8@To_xtime
    call    @_RTC_CheckStackVars@8
    pop eax
    pop edx
    pop edi
    pop esi
    pop ebx
    mov ecx, DWORD PTR __$ArrayPad$[ebp]
    xor ecx, ebp
    call    @__security_check_cookie@4
    add esp, 348                ; 0000015cH
    cmp ebp, esp
    call    __RTC_CheckEsp
    mov esp, ebp
    pop ebp
    ret 0
$LN8@To_xtime:
    DD  2
    DD  $LN7@To_xtime
$LN7@To_xtime:
    DD  -24                 ; ffffffe8H
    DD  16                  ; 00000010H
    DD  $LN5@To_xtime
    DD  -40                 ; ffffffd8H
    DD  8
    DD  $LN6@To_xtime
$LN6@To_xtime:
    DB  95                  ; 0000005fH
    DB  84                  ; 00000054H
    DB  48                  ; 00000030H
    DB  0
$LN5@To_xtime:
    DB  95                  ; 0000005fH
    DB  88                  ; 00000058H
    DB  116                 ; 00000074H
    DB  0
to_xtime ENDP 

Eventually the imported function gets called, the same one SleepEx has used.

sleep_until PROC    ; std::this_thread::sleep_until, COMDAT

; 131  :    {   // sleep until _Abs_time

    push    ebp
    mov ebp, esp
    sub esp, 192                ; 000000c0H
    push    ebx
    push    esi
    push    edi
    lea edi, DWORD PTR [ebp-192]
    mov ecx, 48                 ; 00000030H
    mov eax, -858993460             ; ccccccccH
    rep stosd

; 132  :    _Thrd_sleep(_Abs_time);

    mov esi, esp
    mov eax, DWORD PTR __Abs_time$[ebp]
    push    eax
    call    DWORD PTR __imp___Thrd_sleep
    add esp, 4
    cmp esi, esp
    call    __RTC_CheckEsp

; 133  :    }

    pop edi
    pop esi
    pop ebx
    add esp, 192                ; 000000c0H
    cmp ebp, esp
    call    __RTC_CheckEsp
    mov esp, ebp
    pop ebp
    ret 0
sleep_until ENDP

You should also be aware even SleepEx may not give 100% exact results as per the MSDN documentation https://msdn.microsoft.com/en-us/library/windows/desktop/ms686307(v=vs.85).aspx

This function causes a thread to relinquish the remainder of its time slice and become unrunnable for an interval based on the value of dwMilliseconds. The system clock "ticks" at a constant rate. If dwMilliseconds is less than the resolution of the system clock, the thread may sleep for less than the specified length of time. If dwMilliseconds is greater than one tick but less than two, the wait can be anywhere between one and two ticks, and so on. To increase the accuracy of the sleep interval, call the timeGetDevCaps function to determine the supported minimum timer resolution and the timeBeginPeriod function to set the timer resolution to its minimum. Use caution when calling timeBeginPeriod, as frequent calls can significantly affect the system clock, system power usage, and the scheduler. If you call timeBeginPeriod, call it one time early in the application and be sure to call the timeEndPeriod function at the very end of the application.

like image 77
Malcolm McCaffery Avatar answered Nov 13 '22 02:11

Malcolm McCaffery