If I don't want the overhead needed for a call to strcmp, I compare strings with short string literals the way described in the following code example:
#ifdef LITTLE_ENDIAN //little-endian-addressing
#define BytesAsDWord_M(a, b, c, d)\
((ulong) ((a) | ((b) << 8) | ((ulong) (c) << 16) | ((ulong) (d) << 24)))
#define BytesAsWord_M(a, b)((ushort) ((a) | ((b) << 8)))
#else //LITTLE_ENDIAN //little-endian-addressing
#define BytesAsDWord_M(a, b, c, d)\
((ulong) ((d) | ((c) << 8) | ((b) << 16) | ((a) << 24)))
#define BytesAsWord_M(a, b) ((ushort) ((b) | ((a) << 8)))
#endif //LITTLE_ENDIAN //little-endian-addressing
bool AbsCompare(char* chr_p)
//compare string with "abs"
{
if (*((ulong*) &chr_p[1]) ==
BytesAsDWord_M('a', 'b', 's', '\0'))
return true;
return false;
}
gcc compiles this example as long as I compile without optimization options enabled. With optimization enabled I get the warning:
"dereferencing type-punned pointer will break strict-aliasing rules"
Even optimizing with -O3 doesn't result in effective code as the example illustrates:
//abstest.c
#include <string.h>
typedef unsigned long ulong;
typedef unsigned short ushort;
#if BYTE_ORDER == LITTLE_ENDIAN //little-endian-addressing
#define BytesAsDWord_M(a, b, c, d)\
((ulong) ((a) | ((b) << 8) | ((ulong) (c) << 16) | ((ulong) (d) << 24)))
#define BytesAsWord_M(a, b)((ushort) ((a) | ((b) << 8)))
#else //BYTE_ORDER == LITTLE_ENDIAN //little-endian-addressing
#define BytesAsDWord_M(a, b, c, d)\
((ulong) ((d) | ((c) << 8) | ((b) << 16) | ((a) << 24)))
#define BytesAsWord_M(a, b) ((ushort) ((b) | ((a) << 8)))
#endif //BYTE_ORDER == LITTLE_ENDIAN //little-endian-addressing
int AbsCompare1(char* chr_p)
{
return *(ulong*) chr_p == BytesAsDWord_M('a', 'b', 's', '\0');
}
int AbsCompare2(char* chr_p)
{
return strcmp(chr_p, "abs");
}
int main(int argc __attribute__((unused)), char ** argv)
{
int i;
int j;
i = AbsCompare1(argv[0]);
j = AbsCompare2(argv[0]);
return i + j;
}
objdump -d -Mintel abstest:
080483d0 <AbsCompare1>:
80483d0: 55 push ebp
80483d1: 89 e5 mov ebp,esp
80483d3: 8b 45 08 mov eax,DWORD PTR [ebp+0x8]
80483d6: 5d pop ebp
80483d7: 81 38 61 62 73 00 cmp DWORD PTR [eax],0x736261
80483dd: 0f 94 c0 sete al
80483e0: 0f b6 c0 movzx eax,al
80483e3: c3 ret
080483f0 <AbsCompare2>:
80483f0: 55 push ebp
80483f1: 0f b6 0d 5c 85 04 08 movzx ecx,BYTE PTR ds:0x804855c
80483f8: 89 e5 mov ebp,esp
80483fa: 8b 55 08 mov edx,DWORD PTR [ebp+0x8]
80483fd: 0f b6 02 movzx eax,BYTE PTR [edx]
8048400: 29 c8 sub eax,ecx
8048402: 75 2b jne 804842f <AbsCompare2+0x3f>
8048404: 0f b6 42 01 movzx eax,BYTE PTR [edx+0x1]
8048408: 0f b6 0d 5d 85 04 08 movzx ecx,BYTE PTR ds:0x804855d
804840f: 29 c8 sub eax,ecx
8048411: 75 1c jne 804842f <AbsCompare2+0x3f>
8048413: 0f b6 42 02 movzx eax,BYTE PTR [edx+0x2]
8048417: 0f b6 0d 5e 85 04 08 movzx ecx,BYTE PTR ds:0x804855e
804841e: 29 c8 sub eax,ecx
8048420: 75 0d jne 804842f <AbsCompare2+0x3f>
8048422: 0f b6 42 03 movzx eax,BYTE PTR [edx+0x3]
8048426: 0f b6 15 5f 85 04 08 movzx edx,BYTE PTR ds:0x804855f
804842d: 29 d0 sub eax,edx
804842f: 5d pop ebp
8048430: c3 ret
Is there a possibility to compare this short literal directly without the detour of embedding chr_p into a union, especially because I want to compare chr_p at arbitrary indices like "&chr_p[1]"?
No there is not. Are you aware that a compiler will use its knowledge about strcmp
? It will do exactly what you like to achieve (removing the call overhead) without having to resort to type-punning in the source. These kind of code transformations are most often done in a code generator after the compiler has taken all the benefit from alias analysis.
If I use gcc -O3
to compile the following program, there is no call to strcmp
to be found.
#include <string.h>
int main(int argc, char ** argv)
{
return strcmp(argv[0], "abs");
}
My x86 assembly for example looked like this (gcc version 4.3.2 (Debian 4.3.2-1.1)) (I know it's old):
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
movl 4(%ecx), %eax
movl (%eax), %edx
movzbl (%edx), %eax
subl $97, %eax
jne .L2
movzbl 1(%edx), %eax
subl $98, %eax
jne .L2
movzbl 2(%edx), %eax
subl $115, %eax
jne .L2
movzbl 3(%edx), %eax
.L2:
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
Basically strcmp
has been inlined and unrolled. Of course it highly depends on the code generator for your target. So if it is not evolved enough yet, then it may still generate a strcmp
. Still makes you wonder if you should burden yourself with ugly code, if maybe the code generator will support this later .. when you are still stuck with your code.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With