I wonder if, when it is possible, Is it more efficient a switch or a const table?
For example, what would perform better:
switch(input) {
case 0: value = VALUE_0;
break;
case 1: value = VALUE_1;
break;
case 2: value = VALUE_2;
break;
case 3: value = VALUE_3;
break;
case 4: value = VALUE_4;
break;
case 5: value = VALUE_5;
break;
case 6: value = VALUE_6;
break;
default:
break;
}
Or something like this:
const uint8_t INPUT_TO_VALUE_TABLE[N_VALUE] = {
VALUE_0,
VALUE_1,
VALUE_2,
VALUE_3,
VALUE_4,
VALUE_5,
VALUE_6,
}
...
...
value = INPUT_TO_VALUE_TABLE[input];
I have shown a dummy example, but I have also the code for using a switch calling different functions or a function pointer table.
The code is for is for a 8bits micro (I don'w know if it makes any difference for this topic).
Well, you should consider disassembling the compiled code to see what's actually getting generated, but I'd expect that you end up with less code in the second case, and there's less branching.
In the first case, there are seven assignment statements, and a bunch of jumps (out of the switch statement). In the second, there's one array reference, and one assignment. Since your cases are all contiguous, it's easy to handle the default case:
value = ( input < 6 ) ? INPUT_TO_VALUE_TABLE[input] : default_value;
Let's look at some assembly. This is compiled with gcc -S
, version 4.6.3, so it's not the same as the assembly you'd get, but we should getting the same general kind of result. This answer won't absolutely answer the question of which will be better in your case; you'll have to do some tests on your own, but it looks pretty sure that the table is going to be preferable.
switch
option:We'll start with the switch
:
void switch_input( int input ) {
switch(input) {
case 0: value = VALUE_0;
break;
case 1: value = VALUE_1;
break;
case 2: value = VALUE_2;
break;
case 3: value = VALUE_3;
break;
case 4: value = VALUE_4;
break;
case 5: value = VALUE_5;
break;
case 6: value = VALUE_6;
break;
default: value = VALUE_DEFAULT;
break;
}
}
This has lots of jumps in it, as there are seven different assignments, and based on the value of input
, we have to be able to jump to each one, and then jump to the end of the switch
.
switch_input:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
cmpl $6, -4(%rbp)
ja .L2
movl -4(%rbp), %eax
movq .L10(,%rax,8), %rax
jmp *%rax
.section .rodata
.align 8
.align 4
.L10:
.quad .L3
.quad .L4
.quad .L5
.quad .L6
.quad .L7
.quad .L8
.quad .L9
.text
.L3:
movl $0, value(%rip)
jmp .L1
.L4:
movl $1, value(%rip)
jmp .L1
.L5:
movl $2, value(%rip)
jmp .L1
.L6:
movl $3, value(%rip)
jmp .L1
.L7:
movl $4, value(%rip)
jmp .L1
.L8:
movl $5, value(%rip)
jmp .L1
.L9:
movl $6, value(%rip)
jmp .L1
.L2:
movl $-1, value(%rip)
nop
.L1:
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
The table option can use just a single assignment, and instead having lots of code that we can jump to, we just need a table of values. We don't need to jump into that table, either; we just need to compute an index, and then unconditionally load a value from it.
void index_input( int input ) {
value = ( input < N_VALUE ) ? INPUT_TO_VALUE_TABLE[input] : VALUE_DEFAULT;
}
(Yes, we really should be using an unsigned integer there so that we know it won't be less than zero.)
index_input:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
cmpl $5, -4(%rbp)
jg .L13
movl -4(%rbp), %eax
cltq
movl INPUT_TO_VALUE_TABLE(,%rax,4), %eax
jmp .L14
.L13:
movl $-1, %eax
.L14:
movl %eax, value(%rip)
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
int value;
#define N_VALUE 7
#define VALUE_0 0
#define VALUE_1 1
#define VALUE_2 2
#define VALUE_3 3
#define VALUE_4 4
#define VALUE_5 5
#define VALUE_6 6
#define VALUE_DEFAULT -1
void switch_input( int input ) {
switch(input) {
case 0: value = VALUE_0;
break;
case 1: value = VALUE_1;
break;
case 2: value = VALUE_2;
break;
case 3: value = VALUE_3;
break;
case 4: value = VALUE_4;
break;
case 5: value = VALUE_5;
break;
case 6: value = VALUE_6;
break;
default: value = VALUE_DEFAULT;
break;
}
}
const int INPUT_TO_VALUE_TABLE[N_VALUE] = {
VALUE_0,
VALUE_1,
VALUE_2,
VALUE_3,
VALUE_4,
VALUE_5,
VALUE_6
};
void index_input( int input ) {
value = ( input < 6 ) ? INPUT_TO_VALUE_TABLE[input] : VALUE_DEFAULT;
}
Generated by gcc -S
.
.file "example.c"
.comm value,4,4
.text
.globl switch_input
.type switch_input, @function
switch_input:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
cmpl $6, -4(%rbp)
ja .L2
movl -4(%rbp), %eax
movq .L10(,%rax,8), %rax
jmp *%rax
.section .rodata
.align 8
.align 4
.L10:
.quad .L3
.quad .L4
.quad .L5
.quad .L6
.quad .L7
.quad .L8
.quad .L9
.text
.L3:
movl $0, value(%rip)
jmp .L1
.L4:
movl $1, value(%rip)
jmp .L1
.L5:
movl $2, value(%rip)
jmp .L1
.L6:
movl $3, value(%rip)
jmp .L1
.L7:
movl $4, value(%rip)
jmp .L1
.L8:
movl $5, value(%rip)
jmp .L1
.L9:
movl $6, value(%rip)
jmp .L1
.L2:
movl $-1, value(%rip)
nop
.L1:
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size switch_input, .-switch_input
.globl INPUT_TO_VALUE_TABLE
.section .rodata
.align 16
.type INPUT_TO_VALUE_TABLE, @object
.size INPUT_TO_VALUE_TABLE, 28
INPUT_TO_VALUE_TABLE:
.long 0
.long 1
.long 2
.long 3
.long 4
.long 5
.long 6
.text
.globl index_input
.type index_input, @function
index_input:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
cmpl $5, -4(%rbp)
jg .L13
movl -4(%rbp), %eax
cltq
movl INPUT_TO_VALUE_TABLE(,%rax,4), %eax
jmp .L14
.L13:
movl $-1, %eax
.L14:
movl %eax, value(%rip)
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size index_input, .-index_input
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",@progbits
It is probably quite unlikely that the switch will get optimized into something as efficient as the table lookup. It is probably safe to assume that the table is faster, but you have to check on your specific system.
Had it been an array of function pointers, where each function pointer represents a case
, it would have been another story however. Because it is very likely that a decent compiler optimizes a switch with adjacent number inputs into such a function-pointer table.
Since you mention 8-bit MCUs, it might be quite relevant to make manual code optimizations. Sadly, most mainstream 8-bit MCU compilers are poor at code optimization (and often rather poor at standard compliance as well).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With