Our code is written in C++ 11 (VS2012/Win 7-64bit). The C++ library provides a sleep_for
function that we use. We observed that the C++ sleep_for
sometimes shows a large overshoot. In other words we request to sleep for say 15 ms but the sleep turns out to be e.g. 100 ms. We see this when the load on the system is high.
My first reaction: “of course the sleeps "take longer" if there is a lot of load on the system and other threads are using the CPU”.
However the “funny” thing is that if we replace the sleep_for
by a Windows API “Sleep” call then we do not see this behavior. I also saw that the sleep_for
function under water makes a call to the Window API Sleep
method.
The documentation for sleep_for states:
The function blocks the calling thread for at least the time that's specified by Rel_time. This function does not throw any exceptions.
So technically the function is working. However we did not expect to see a difference between C++ sleep_for
and the regular Sleep(Ex)
function.
Can somebody explain this behavior?
There is quite a bit of additional code executed if using sleep_for vs SleepEx.
For example calling SleepEx(15) generates the following assembly in debug mode (Visual Studio 2015):
; 9 : SleepEx(15, false);
mov esi, esp
push 0
push 15 ; 0000000fH
call DWORD PTR __imp__SleepEx@8
cmp esi, esp
call __RTC_CheckEsp
By contrast this code
const std::chrono::milliseconds duration(15);
std::this_thread::sleep_for(duration);
Generates the following:
; 9 : std::this_thread::sleep_for(std::chrono::milliseconds(15));
mov DWORD PTR $T1[ebp], 15 ; 0000000fH
lea eax, DWORD PTR $T1[ebp]
push eax
lea ecx, DWORD PTR $T2[ebp]
call duration
push eax
call sleep_for
add esp, 4
This calls into:
duration PROC ; std::chrono::duration<__int64,std::ratio<1,1000> >::duration<__int64,std::ratio<1,1000> ><int,void>, COMDAT
; _this$ = ecx
; 113 : { // construct from representation
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
push ecx
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
pop ecx
mov DWORD PTR _this$[ebp], ecx
; 112 : : _MyRep(static_cast<_Rep>(_Val))
mov eax, DWORD PTR __Val$[ebp]
mov eax, DWORD PTR [eax]
cdq
mov ecx, DWORD PTR _this$[ebp]
mov DWORD PTR [ecx], eax
mov DWORD PTR [ecx+4], edx
; 114 : }
mov eax, DWORD PTR _this$[ebp]
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 4
duration ENDP
And calls into
sleep_for PROC ; std::this_thread::sleep_for<__int64,std::ratio<1,1000> >, COMDAT
; 151 : { // sleep for duration
push ebp
mov ebp, esp
sub esp, 268 ; 0000010cH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-268]
mov ecx, 67 ; 00000043H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
; 152 : stdext::threads::xtime _Tgt = _To_xtime(_Rel_time);
mov eax, DWORD PTR __Rel_time$[ebp]
push eax
lea ecx, DWORD PTR $T1[ebp]
push ecx
call to_xtime
add esp, 8
mov edx, DWORD PTR [eax]
mov DWORD PTR $T2[ebp], edx
mov ecx, DWORD PTR [eax+4]
mov DWORD PTR $T2[ebp+4], ecx
mov edx, DWORD PTR [eax+8]
mov DWORD PTR $T2[ebp+8], edx
mov eax, DWORD PTR [eax+12]
mov DWORD PTR $T2[ebp+12], eax
mov ecx, DWORD PTR $T2[ebp]
mov DWORD PTR __Tgt$[ebp], ecx
mov edx, DWORD PTR $T2[ebp+4]
mov DWORD PTR __Tgt$[ebp+4], edx
mov eax, DWORD PTR $T2[ebp+8]
mov DWORD PTR __Tgt$[ebp+8], eax
mov ecx, DWORD PTR $T2[ebp+12]
mov DWORD PTR __Tgt$[ebp+12], ecx
; 153 : sleep_until(&_Tgt);
lea eax, DWORD PTR __Tgt$[ebp]
push eax
call sleep_until
add esp, 4
; 154 : }
push edx
mov ecx, ebp
push eax
lea edx, DWORD PTR $LN5@sleep_for
call @_RTC_CheckStackVars@8
pop eax
pop edx
pop edi
pop esi
pop ebx
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call @__security_check_cookie@4
add esp, 268 ; 0000010cH
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
npad 3
$LN5@sleep_for:
DD 1
DD $LN4@sleep_for
$LN4@sleep_for:
DD -24 ; ffffffe8H
DD 16 ; 00000010H
DD $LN3@sleep_for
$LN3@sleep_for:
DB 95 ; 0000005fH
DB 84 ; 00000054H
DB 103 ; 00000067H
DB 116 ; 00000074H
DB 0
sleep_for ENDP
Some conversion happens:
to_xtime PROC ; std::_To_xtime<__int64,std::ratio<1,1000> >, COMDAT
; 758 : { // convert duration to xtime
push ebp
mov ebp, esp
sub esp, 348 ; 0000015cH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-348]
mov ecx, 87 ; 00000057H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
; 759 : xtime _Xt;
; 760 : if (_Rel_time <= chrono::duration<_Rep, _Period>::zero())
lea eax, DWORD PTR $T7[ebp]
push eax
call duration_zero ; std::chrono::duration<__int64,std::ratio<1,1000> >::zero
add esp, 4
push eax
mov ecx, DWORD PTR __Rel_time$[ebp]
push ecx
call chronos_operator ; std::chrono::operator<=<__int64,std::ratio<1,1000>,__int64,std::ratio<1,1000> >
add esp, 8
movzx edx, al
test edx, edx
je SHORT $LN2@To_xtime
; 761 : { // negative or zero relative time, return zero
; 762 : _Xt.sec = 0;
xorps xmm0, xmm0
movlpd QWORD PTR __Xt$[ebp], xmm0
; 763 : _Xt.nsec = 0;
mov DWORD PTR __Xt$[ebp+8], 0
; 764 : }
; 765 : else
jmp $LN3@To_xtime
$LN2@To_xtime:
; 766 : { // positive relative time, convert
; 767 : chrono::nanoseconds _T0 =
; 768 : chrono::system_clock::now().time_since_epoch();
lea eax, DWORD PTR $T5[ebp]
push eax
lea ecx, DWORD PTR $T6[ebp]
push ecx
call system_clock_now ; std::chrono::system_clock::now
add esp, 4
mov ecx, eax
call time_since_ephoch ; std::chrono::time_point<std::chrono::system_clock,std::chrono::duration<__int64,std::ratio<1,10000000> > >::time_since_epoch
push eax
lea ecx, DWORD PTR __T0$8[ebp]
call duration ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,10000000>,void>
; 769 : _T0 += _Rel_time;
mov eax, DWORD PTR __Rel_time$[ebp]
push eax
lea ecx, DWORD PTR $T4[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1000>,void>
lea ecx, DWORD PTR $T4[ebp]
push ecx
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator+=
; 770 : _Xt.sec = chrono::duration_cast<chrono::seconds>(_T0).count();
lea eax, DWORD PTR __T0$8[ebp]
push eax
lea ecx, DWORD PTR $T3[ebp]
push ecx
call duration_cast ; std::chrono::duration_cast<std::chrono::duration<__int64,std::ratio<1,1> >,__int64,std::ratio<1,1000000000> >
add esp, 8
mov ecx, eax
call duration_count ; std::chrono::duration<__int64,std::ratio<1,1> >::count
mov DWORD PTR __Xt$[ebp], eax
mov DWORD PTR __Xt$[ebp+4], edx
; 771 : _T0 -= chrono::seconds(_Xt.sec);
lea eax, DWORD PTR __Xt$[ebp]
push eax
lea ecx, DWORD PTR $T1[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1> >::duration<__int64,std::ratio<1,1> ><__int64,void>
push eax
lea ecx, DWORD PTR $T2[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1>,void>
lea ecx, DWORD PTR $T2[ebp]
push ecx
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator-=
; 772 : _Xt.nsec = (long)_T0.count();
lea ecx, DWORD PTR __T0$8[ebp]
call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::count
mov DWORD PTR __Xt$[ebp+8], eax
$LN3@To_xtime:
; 773 : }
; 774 : return (_Xt);
mov eax, DWORD PTR $T9[ebp]
mov ecx, DWORD PTR __Xt$[ebp]
mov DWORD PTR [eax], ecx
mov edx, DWORD PTR __Xt$[ebp+4]
mov DWORD PTR [eax+4], edx
mov ecx, DWORD PTR __Xt$[ebp+8]
mov DWORD PTR [eax+8], ecx
mov edx, DWORD PTR __Xt$[ebp+12]
mov DWORD PTR [eax+12], edx
mov eax, DWORD PTR $T9[ebp]
; 775 : }
push edx
mov ecx, ebp
push eax
lea edx, DWORD PTR $LN8@To_xtime
call @_RTC_CheckStackVars@8
pop eax
pop edx
pop edi
pop esi
pop ebx
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call @__security_check_cookie@4
add esp, 348 ; 0000015cH
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
$LN8@To_xtime:
DD 2
DD $LN7@To_xtime
$LN7@To_xtime:
DD -24 ; ffffffe8H
DD 16 ; 00000010H
DD $LN5@To_xtime
DD -40 ; ffffffd8H
DD 8
DD $LN6@To_xtime
$LN6@To_xtime:
DB 95 ; 0000005fH
DB 84 ; 00000054H
DB 48 ; 00000030H
DB 0
$LN5@To_xtime:
DB 95 ; 0000005fH
DB 88 ; 00000058H
DB 116 ; 00000074H
DB 0
to_xtime ENDP
Eventually the imported function gets called, the same one SleepEx has used.
sleep_until PROC ; std::this_thread::sleep_until, COMDAT
; 131 : { // sleep until _Abs_time
push ebp
mov ebp, esp
sub esp, 192 ; 000000c0H
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-192]
mov ecx, 48 ; 00000030H
mov eax, -858993460 ; ccccccccH
rep stosd
; 132 : _Thrd_sleep(_Abs_time);
mov esi, esp
mov eax, DWORD PTR __Abs_time$[ebp]
push eax
call DWORD PTR __imp___Thrd_sleep
add esp, 4
cmp esi, esp
call __RTC_CheckEsp
; 133 : }
pop edi
pop esi
pop ebx
add esp, 192 ; 000000c0H
cmp ebp, esp
call __RTC_CheckEsp
mov esp, ebp
pop ebp
ret 0
sleep_until ENDP
You should also be aware even SleepEx may not give 100% exact results as per the MSDN documentation https://msdn.microsoft.com/en-us/library/windows/desktop/ms686307(v=vs.85).aspx
This function causes a thread to relinquish the remainder of its time slice and become unrunnable for an interval based on the value of dwMilliseconds. The system clock "ticks" at a constant rate. If dwMilliseconds is less than the resolution of the system clock, the thread may sleep for less than the specified length of time. If dwMilliseconds is greater than one tick but less than two, the wait can be anywhere between one and two ticks, and so on. To increase the accuracy of the sleep interval, call the timeGetDevCaps function to determine the supported minimum timer resolution and the timeBeginPeriod function to set the timer resolution to its minimum. Use caution when calling timeBeginPeriod, as frequent calls can significantly affect the system clock, system power usage, and the scheduler. If you call timeBeginPeriod, call it one time early in the application and be sure to call the timeEndPeriod function at the very end of the application.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With