Modulus of short integers not correct. This is really weird, and already cost me two days. I've narrowed the problematic code as follows (simplified as far as possible):
#include <stdio.h>
#include <stdlib.h>
int foo(short Width, short Height, short MSize)
{
short i = 0, k = 0, pos = 0;
short j = 0;
for(j = 1; j < Width - 1; j = j + 1)
{/* a blank loop */}
for(i = 1; i < Height - 1; i = i + 1) {
for(j = 1; j < Width - 1; j = j + 1) {
if((j % MSize) == 0) {
k = k + 1;
}
printf("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize));
if (pos >= 1024) {
fprintf(stderr, "pos = %d, over 1024\n", (int)pos);
}
pos = pos + 1;
}
}
return 0;
}
int main(int argc, char* argv[])
{
foo(32, 32, 8);
return 0;
}
When compiled in Debug mode, the above codes work fine, the result of j%MSize is correct, however, when compiled in Release mode, the result of j%MSize will always be 7, which is nonsense (tested under Visual Studio 2005/2012/2013). There is no memory operation, so it should not be caused by stack corruption. Anybody has a clue?
The output I'm seeing is (slightly edited):
j=10, MSize=8, j mod MSize=7
j=11, MSize=8, j mod MSize=7
j=12, MSize=8, j mod MSize=7
j=13, MSize=8, j mod MSize=7
j=14, MSize=8, j mod MSize=7
j=15, MSize=8, j mod MSize=7
j=16, MSize=8, j mod MSize=7
j=17, MSize=8, j mod MSize=7
j=18, MSize=8, j mod MSize=7
j=19, MSize=8, j mod MSize=7
j=20, MSize=8, j mod MSize=7
j=21, MSize=8, j mod MSize=7
j=22, MSize=8, j mod MSize=7
j=23, MSize=8, j mod MSize=7
j=24, MSize=8, j mod MSize=7
j=25, MSize=8, j mod MSize=7
j=26, MSize=8, j mod MSize=7
j=27, MSize=8, j mod MSize=7
The following is the build log:
1>Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" on node 2 (Build target(s)).
1>ClCompile:
D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\CL.exe /c /Zi /nologo /W3 /WX- /sdl /O2 /Oi /Oy- /GL /D WIN32 /D NDEBUG /D _CONSOLE /D _MBCS /Gm- /EHsc /MT /GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /Fo"Release\\" /Fd"Release\vc110.pdb" /Gd /TP /analyze- /errorReport:prompt WeirdBug.cpp
WeirdBug.cpp
Link:
D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\link.exe /ERRORREPORT:PROMPT /OUT:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe" /INCREMENTAL:NO /NOLOGO kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /MANIFEST /MANIFESTUAC:"level='asInvoker' uiAccess='false'" /manifest:embed /DEBUG /PDB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.pdb" /SUBSYSTEM:CONSOLE /OPT:REF /OPT:ICF /LTCG /TLBID:1 /DYNAMICBASE /NXCOMPAT /IMPLIB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.lib" /MACHINE:X86 /SAFESEH Release\WeirdBug.obj
Generating code
Finished generating code
SNDFeatureExtract.vcxproj -> E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe
1>Done Building Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" (Build target(s)).
The following is the Disassembly result from VS:
short i = 0, k = 0, pos = 0;
short j = 0;
for(j = 1; j < Width - 1; j = j + 1)
00801014 mov edi,1FF983C8h
00801019 jl foo+12h (0801012h)
{/* a blank loop */}
for(i = 1; i < Height - 1; i = i + 1) {
0080101B mov edx,1
00801020 mov dword ptr [ebp-4],1
00801027 mov dword ptr [ebp-8],edx
0080102A and ecx,80000007h
00801030 jns foo+37h (0801037h)
00801032 dec ecx
00801033 or ecx,0FFFFFFF8h
00801036 inc ecx
00801037 mov dword ptr [ebp-0Ch],ecx
0080103A lea ebx,[ebx]
00801040 mov eax,1
for(j = 1; j < Width - 1; j = j + 1) {
00801045 mov ebx,eax
if((j % MSize) == 0) {
00801047 test ecx,ecx
00801049 jne foo+4Ch (080104Ch)
k = k + 1;
0080104B inc edi
}
printf_s("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize));
0080104C push ecx
0080104D push 8
0080104F push eax
00801050 movsx eax,di
00801053 push eax
00801054 push edx
00801055 push 80CD30h
0080105A call printf_s (0801266h)
if (pos >= 1024) {
0080105F mov eax,400h
00801064 add esp,18h
00801067 cmp si,ax
0080106A jl foo+86h (0801086h)
fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos);
0080106C movsx eax,si
fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos);
0080106F push eax
00801070 push 80CD5Ch
00801075 call __iob_func (0801175h)
0080107A add eax,40h
0080107D push eax
0080107E call fprintf_s (080127Ch)
00801083 add esp,0Ch
for(j = 1; j < Width - 1; j = j + 1) {
00801086 mov ecx,dword ptr [ebp-0Ch]
00801089 mov edx,dword ptr [ebp-8]
}
pos = pos + 1;
0080108C inc ebx
0080108D movsx eax,bx
00801090 inc esi
00801091 cmp eax,1Fh
00801094 jl foo+47h (0801047h)
{/* a blank loop */}
for(i = 1; i < Height - 1; i = i + 1) {
00801096 mov eax,dword ptr [ebp-4]
00801099 inc eax
0080109A movsx edx,ax
0080109D mov dword ptr [ebp-4],eax
008010A0 mov dword ptr [ebp-8],edx
008010A3 cmp edx,1Fh
008010A6 jl foo+40h (0801040h)
}
}
return 0;
008010A8 pop edi
008010A9 pop esi
008010AA xor eax,eax
008010AC pop ebx
}
008010AD mov esp,ebp
008010AF pop ebp
008010B0 ret
it is because the compiler's optimization, and this is something todo with your blank loop. But I'm not quite sure where the problem is.
To simply solve the question, declare j as:
volatile short j;
and it will works fine. Cause program will fetch j from memory instead of registers every time.
I debugged the assembly code, and find out the program calculate the j % MSize and stored it into memory just after the blank loop, and every time before doing printf, it just fetch the value from memory instead of re-calculating it.
mov ecx,dword ptr [ebp-10h] // j % MSize @ memory
push ecx // j % MSize
mov ecx,dword ptr [ebp-0Ch]
push 8 // MSize
push eax // j
movsx eax,word ptr [IdxY]
movsx esi,di
push esi // k
push eax // IdxY
push ecx // i
// push static string and calling printf
But adding a volatile, it will be act like:
mov dx,word ptr [j]
movsx eax,dx // j
and eax,80000007h // j % 8
push eax
// push other vars and calling printf
That's re-calculating the MOD, and push it into stack for printf. So it's likely a compiler's bug, because it should fetch j from memory even if there is no volatile add.
Since I can't add comments again now :(.. I found out it's the /Oxxx and /GL flag's fault. It will choose one from below:
/O1 /O2 /Ox
It has to choose one of the above options along with /GL to see the issue.
My IDE is Visual Studio 2010 10.0.40219.1 SP1Rel
I don't see any issue
$ gcc modulus.c
$ ./a.out
Width = 32, Height = 32, MSize = 8, Dim =16, sizeof(short)=2
i=1, IdxY=0, k=0, j=1, MSize=8, j mod MSize=1
i=1, IdxY=0, k=0, j=2, MSize=8, j mod MSize=2
i=1, IdxY=0, k=0, j=3, MSize=8, j mod MSize=3
i=1, IdxY=0, k=0, j=4, MSize=8, j mod MSize=4
i=1, IdxY=0, k=0, j=5, MSize=8, j mod MSize=5
i=1, IdxY=0, k=0, j=6, MSize=8, j mod MSize=6
i=1, IdxY=0, k=0, j=7, MSize=8, j mod MSize=7
i=1, IdxY=0, k=1, j=8, MSize=8, j mod MSize=0
Am i missing something?
In addition to the other answers already provided I want to point out that you can prevent such erros with better scoping (although it's not your fault in this case, it's probably a compiler bug).
Prefer to declare the iteration variables inside the loop-scope. Or even more general: Declare variables only in the scope where they are used.
If you change the second for
-loop to this:
for(short j = 1; j < Width - 1; j = j + 1) {
so that j
is declared in the scope of the for
-loop, the compiler must treat j
as a new variable which has nothing to do with the previous empty loop. Therefore it is less likely to over-optimize by reusing previous memory locations. This small change fixes the bug in VS2013 and I consider it much cleaner than using volatile
.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With