I have some code in an ISR. The code is given for completeness, the question is only about the commented-out __asm_ block.
Without the __asm_ block, this is compiled into 82 instructions. With the __asm_ block, the result is 107 instructions long. Why the big difference?
Here's the C code:
if (PIR1bits.SSPIF)
{
spi_rec_buffer.read_cursor = 0;
spi_rec_buffer.write_cursor = 0;
LATAbits.LATA4 ^= 1;
// _asm nop nop _endasm
LATAbits.LATA4 ^= 1;
while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
{
spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
PIR1bits.SSPIF = 0;
spi_rec_buffer.write_cursor++;
spi_out_msg_buffer.read_cursor++;
if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
LATAbits.LATA4 = 0;
LATBbits.LATB1 = 1;
while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
LATBbits.LATB1 = 0;
}
spi_message_locked = true;
spi_message_received = true;
}
Without NOPs:
BTFSS 0x9e,0x3,0x0 if (PIR1bits.SSPIF)
BRA 0x2ba
{
MOVLB 0xf spi_rec_buffer.read_cursor = 0;
CLRF 0x4,0x1
CLRF 0x5,0x1
CLRF 0x6,0x1 spi_rec_buffer.write_cursor = 0;
CLRF 0x7,0x1
BTG 0x89,0x4,0x0 LATAbits.LATA4 ^= 1;
BTG 0x89,0x4,0x0 LATAbits.LATA4 ^= 1;
MOVF 0x80,0x0,0x0 while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
ANDLW 0x20
BNZ 0x2b0
MOVLB 0xf
MOVF 0x7,0x0,0x1
XORWF 0x3,0x0,0x1
BTFSS 0xe8,0x7,0x0
BRA 0x254
RLCF 0x3,0x0,0x1
BRA 0x25c
MOVF 0x2,0x0,0x1
SUBWF 0x6,0x0,0x1
MOVF 0x3,0x0,0x1
SUBWFB 0x7,0x0,0x1
BC 0x2b0
BRA 0x240
{
MOVF 0x0,0x0,0x1 spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
ADDWF 0x6,0x0,0x1
MOVWF 0xe9,0x0
MOVF 0x1,0x0,0x1
ADDWFC 0x7,0x0,0x1
MOVWF 0xea,0x0
MOVFF 0xfc9,0xfef
MOVLB 0xf SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
MOVF 0x10,0x0,0x1
ADDWF 0x14,0x0,0x1
MOVWF 0xe9,0x0
MOVF 0x11,0x0,0x1
ADDWFC 0x15,0x0,0x1
MOVWF 0xea,0x0
MOVF 0xef,0x0,0x0
MOVWF 0xc9,0x0
BCF 0x9e,0x3,0x0 PIR1bits.SSPIF = 0;
MOVLB 0xf spi_rec_buffer.write_cursor++;
INCF 0x6,0x1,0x1
MOVLW 0x0
ADDWFC 0x7,0x1,0x1
MOVLB 0xf spi_out_msg_buffer.read_cursor++;
INCF 0x14,0x1,0x1
ADDWFC 0x15,0x1,0x1
MOVF 0x16,0x0,0x1 if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
XORWF 0x14,0x0,0x1
BNZ 0x29e
MOVF 0x17,0x0,0x1
XORWF 0x15,0x0,0x1
BNZ 0x29e
BCF 0x89,0x4,0x0 LATAbits.LATA4 = 0;
BSF 0x8a,0x1,0x0 LATBbits.LATB1 = 1;
MOVF 0x80,0x0,0x0 while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
ANDLW 0x20
BNZ 0x2ac
MOVF 0x9e,0x0,0x0
ANDLW 0x8
BZ 0x2a0
BCF 0x8a,0x1,0x0 LATBbits.LATB1 = 0;
}
MOVLB 0xf spi_message_locked = true;
MOVLW 0x1
MOVWF 0x18,0x1
MOVLB 0xf spi_message_received = true;
MOVWF 0x19,0x1
}
MOVLW 0x4 }
SUBWF 0xe1,0x0,0x0
BC 0x2c4
CLRF 0xe1,0x0
MOVF 0xe5,0x1,0x0
MOVWF 0xe1,0x0
MOVF 0xe5,0x1,0x0
MOVFF 0xfe7,0xfd9
MOVF 0xe5,0x1,0x0
MOVFF 0xfe5,0xfea
MOVFF 0xfe5,0xfe9
MOVFF 0xfe5,0xfda
RETFIE 0x1
With NOPs:
BTFSS 0x9e,0x3,0x0 if (PIR1bits.SSPIF)
BRA 0x30e
{
MOVLB 0xf spi_rec_buffer.read_cursor = 0;
CLRF 0x4,0x1
CLRF 0x5,0x1
MOVLB 0xf spi_rec_buffer.write_cursor = 0;
CLRF 0x6,0x1
CLRF 0x7,0x1
BTG 0x89,0x4,0x0 LATAbits.LATA4 ^= 1;
NOP _asm nop nop _endasm
NOP
BTG 0x89,0x4,0x0 LATAbits.LATA4 ^= 1;
MOVF 0x80,0x0,0x0 while (!PORTAbits.NOT_SS && spi_rec_buffer.write_cursor < spi_rec_buffer.size)
ANDLW 0x20
BNZ 0x302
MOVLB 0xf
MOVF 0x7,0x0,0x1
MOVLB 0xf
XORWF 0x3,0x0,0x1
BTFSS 0xe8,0x7,0x0
BRA 0x27e
RLCF 0x3,0x0,0x1
BRA 0x28c
MOVF 0x2,0x0,0x1
MOVLB 0xf
SUBWF 0x6,0x0,0x1
MOVLB 0xf
MOVF 0x3,0x0,0x1
MOVLB 0xf
SUBWFB 0x7,0x0,0x1
BC 0x302
BRA 0x268
{
MOVLB 0xf spi_rec_buffer.data[spi_rec_buffer.write_cursor] = SSPBUF;
MOVLB 0xf
MOVF 0x0,0x0,0x1
MOVLB 0xf
ADDWF 0x6,0x0,0x1
MOVWF 0xe9,0x0
MOVLB 0xf
MOVLB 0xf
MOVF 0x1,0x0,0x1
MOVLB 0xf
ADDWFC 0x7,0x0,0x1
MOVWF 0xea,0x0
MOVFF 0xfc9,0xfef
MOVLB 0xf SSPBUF = spi_out_msg_buffer.data[spi_out_msg_buffer.read_cursor];
MOVLB 0xf
MOVF 0x10,0x0,0x1
MOVLB 0xf
ADDWF 0x14,0x0,0x1
MOVWF 0xe9,0x0
MOVLB 0xf
MOVLB 0xf
MOVF 0x11,0x0,0x1
MOVLB 0xf
ADDWFC 0x15,0x0,0x1
MOVWF 0xea,0x0
MOVF 0xef,0x0,0x0
MOVWF 0xc9,0x0
BCF 0x9e,0x3,0x0 PIR1bits.SSPIF = 0; // Interruptflag löschen...
MOVLB 0xf spi_rec_buffer.write_cursor++;
INCF 0x6,0x1,0x1
MOVLW 0x0
ADDWFC 0x7,0x1,0x1
MOVLB 0xf spi_out_msg_buffer.read_cursor++;
INCF 0x14,0x1,0x1
MOVLW 0x0
ADDWFC 0x15,0x1,0x1
MOVLB 0xf if (spi_out_msg_buffer.read_cursor == spi_out_msg_buffer.write_cursor)
MOVF 0x16,0x0,0x1
MOVLB 0xf
XORWF 0x14,0x0,0x1
BNZ 0x2ea
MOVLB 0xf
MOVF 0x17,0x0,0x1
MOVLB 0xf
XORWF 0x15,0x0,0x1
BNZ 0x2ee
BCF 0x89,0x4,0x0 LATAbits.LATA4 = 0;
BSF 0x8a,0x1,0x0 LATBbits.LATB1 = 1;
MOVF 0x80,0x0,0x0 while (!PORTAbits.NOT_SS && !PIR1bits.SSPIF);
ANDLW 0x20
BNZ 0x2fe
MOVF 0x9e,0x0,0x0
ANDLW 0x8
BNZ 0x2fe
BRA 0x2f0
BCF 0x8a,0x1,0x0 LATBbits.LATB1 = 0;
}
MOVLB 0xf spi_message_locked = true;
MOVLW 0x1
MOVWF 0x18,0x1
MOVLB 0xf spi_message_received = true;
MOVLW 0x1
MOVWF 0x19,0x1
}
MOVLW 0x4 }
SUBWF 0xe1,0x0,0x0
BC 0x318
CLRF 0xe1,0x0
MOVF 0xe5,0x1,0x0
MOVWF 0xe1,0x0
MOVF 0xe5,0x1,0x0
MOVFF 0xfe7,0xfd9
MOVF 0xe5,0x1,0x0
MOVFF 0xfe5,0xfea
MOVFF 0xfe5,0xfe9
MOVFF 0xfe5,0xfda
RETFIE 0x1
Here's a screenshot of a partly diff (click to enlarge):
NOPs are often used for timing reasons. Normally it is nothing to worry about.
A NOP is most commonly used for timing purposes, to force memory alignment, to prevent hazards, to occupy a branch delay slot, to render void an existing instruction such as a jump, as a target of an execute instruction, or as a place-holder to be replaced by active instructions later on in program development (or to ...
What Does No Operation (NOP) Mean? A no operation or “no-op” instruction in an assembly language is an instruction that does not implement any operation. IT pros or others might refer to this as a blank instruction or placeholder.
NOP instruction is the short form for 'No Operation' and is often useful to fine tune delays or create a handle for breakpoints.
So that people don't have to guess, here's a statement from the Microchip C18 manual (emphasis added):
It is generally recommended to limit the use of inline assembly to a minimum. Any functions containing inline assembly will not be optimized by the compiler. To write large fragments of assembly code, use the MPASM assembler and link the modules to the C modules using the MPLINK linker.
I think that this is a common situation with inline asm. GCC is an exception - it will optimize the inline assembly along with the surrounding C code; in order to do this correctly, GCC's inline assembly is quite complex (you have to let it know which registers and memory are clobbered).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With