I'm trying to do some Code Optimization to Eliminate Branches, the original c code is
if( a < b )
k = (k<<1) + 1;
else
k = (k<<1)
I intend to replace it with assembly code like below
mov a, %rax
mov b, %rbx
mov k, %rcx
xor %rdx %rdx
shl 1, %rcx
cmp %rax, %rax
setb %rdx
add %rdx,%rcx
mov %rcx, k
so I write c inline assembly code like blow,
#define next(a, b, k)\
__asm__("shl $0x1, %0; \
xor %%rbx, %%rbx; \
cmp %1, %2; \
setb %%rbx; \
addl %%rbx,%0;":"+c"(k) :"g"(a),"g"(b))
when I compile the code below i got error:
operand type mismatch for `add'
operand type mismatch for `setb'
How can I fix it?
Here are the mistakes in your code:
setb %bl
works while setb %rbx
doesn't.T = (A < B)
should translate to cmp B,A; setb T
in AT&T x86 assembler syntax. You had the two operands to CMP in the wrong order. Remember that CMP works like SUB.Once you realize the first two error messages are produced by the assembler, it follows that the trick to debugging them is to look at the assembler code generated by gcc. Try gcc $CFLAGS -S t.c
and compare the problematic lines in t.s
with an x86 opcode reference. Focus on the allowed operand codes for each instruction and you'll quickly see the problems.
In the fixed source code posted below, I assume your operands are unsigned since you're using SETB instead of SETL. I switched from using RBX to RCX to hold the temporary value because RCX is a call clobbered register in the ABI and used the "=&c"
constraint to mark it as an earlyclobber operand since RCX is cleared before the inputs a
and b
are read:
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
static uint64_t next(uint64_t a, uint64_t b, uint64_t k)
{
uint64_t tmp;
__asm__("shl $0x1, %[k];"
"xor %%rcx, %%rcx;"
"cmp %[b], %[a];"
"setb %%cl;"
"addq %%rcx, %[k];"
: /* outputs */ [k] "+g" (k), [tmp] "=&c" (tmp)
: /* inputs */ [a] "r" (a), [b] "g" (b)
: /* clobbers */ "cc");
return k;
}
int main()
{
uint64_t t, t0, k;
k = next(1, 2, 0);
printf("%" PRId64 "\n", k);
scanf("%" SCNd64 "%" SCNd64, &t, &t0);
k = next(t, t0, k);
printf("%" PRId64 "\n", k);
return 0;
}
main() translates to:
<+0>: push %rbx
<+1>: xor %ebx,%ebx
<+3>: mov $0x4006c0,%edi
<+8>: mov $0x1,%bl
<+10>: xor %eax,%eax
<+12>: sub $0x10,%rsp
<+16>: shl %rax
<+19>: xor %rcx,%rcx
<+22>: cmp $0x2,%rbx
<+26>: setb %cl
<+29>: add %rcx,%rax
<+32>: mov %rax,%rbx
<+35>: mov %rax,%rsi
<+38>: xor %eax,%eax
<+40>: callq 0x400470 <printf@plt>
<+45>: lea 0x8(%rsp),%rdx
<+50>: mov %rsp,%rsi
<+53>: mov $0x4006c5,%edi
<+58>: xor %eax,%eax
<+60>: callq 0x4004a0 <__isoc99_scanf@plt>
<+65>: mov (%rsp),%rax
<+69>: mov %rbx,%rsi
<+72>: mov $0x4006c0,%edi
<+77>: shl %rsi
<+80>: xor %rcx,%rcx
<+83>: cmp 0x8(%rsp),%rax
<+88>: setb %cl
<+91>: add %rcx,%rsi
<+94>: xor %eax,%eax
<+96>: callq 0x400470 <printf@plt>
<+101>: add $0x10,%rsp
<+105>: xor %eax,%eax
<+107>: pop %rbx
<+108>: retq
You can see the result of next()
being moved into RSI before each printf()
call.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With