I've got an address which points to a control port like so (for context I'm working on a Sega/Megadrive game):
volatile u32 * vdp_ctrl = (u32 *) 0x00C00004;
And an array of initial values I want to set:
const u8 initial_vdp_vals[24] = {
0x20,
0x74,
0x30,
..etc
};
With a loop:
typedef struct {
u16 upper;
u8 reg;
u8 val;
} bitset;
typedef union {
bitset b;
u32 as_u32;
} u_bitset;
static inline void init_vdp() {
u_bitset cmd = {{0x00008000}};
for(int i = 0; i < 24; i++) {
cmd.b.val = initial_vdp_vals[i];
*vdp_ctrl = cmd.as_u32;
cmd.b.reg += 1;
}
}
The problem is that gcc (with O2 at least) optimizes this away and only writes the last value to the *vdp_ctrl
pointer. I've managed to get around this by setting one of the properties in the bitset
struct to volatile
, but that doesn't seem like an intuitive solution, and the assembly it generates as a result is very lengthy.
So my question is two fold:
vdp_ctrl
pointer needs to accept multiple writes over time. I'm going to be using this pattern often (writing const/static data to control/data addresses in a loop), and marking random fields as volatile seem unintuitive. move.l #initial_vdp_vals, a0
move.l #24, d0
move.l #0x00008000, d1
.copy:
move.b (a0)+, d1
move.w d1, 0x00C00004
add.w #0x0100, d1
dbra d0, .copy
Which is very nice and succinct. So my other question might be (as a complete C novice): Is there a better approach to my C solution to get me closer to the assembly above? I'm not even sure if my code is correct to be honest as I'm just trying to get past this loop optimization issue first since I know it's going to be a constant issue moving forward.
Runnable example to produce my issue:
volatile unsigned long * vdp_ctrl = (unsigned long *) 0x00C00004;
const unsigned char initial_vdp_vals[24] = {
0x20,
0x74,
0x30,
0x40,
0x05,
0x70,
0x00,
0x00,
0x00,
0x00,
0x00,
0x08,
0x81,
0x34,
0x00,
0x00,
0x01,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00
};
typedef struct {
unsigned int upper;
unsigned char reg;
unsigned char val;
} bitset;
typedef union {
bitset b;
unsigned long as_u32;
} u_bitset;
static inline void init_vdp() {
u_bitset cmd = {{0x00008000}};
for(int i = 0; i < 24; i++) {
cmd.b.val = initial_vdp_vals[i];
*vdp_ctrl = cmd.as_u32;
}
}
void init() {
init_vdp();
for(;;) {
}
}
With m68k-linux-gnu-gcc -ffreestanding -O2 -S -c test.c -o test.s
. Generates the following:
#NO_APP
.file "test.c"
.text
.align 2
.globl init
.type init, @function
init:
link.w %fp,#0
move.l vdp_ctrl,%a0
moveq #24,%d0
move.l #32768,%d1
.L2:
move.l %d1,(%a0)
subq.l #1,%d0
jne .L2
.L3:
jra .L3
.size init, .-init
.globl initial_vdp_vals
.section .rodata
.type initial_vdp_vals, @object
.size initial_vdp_vals, 24
initial_vdp_vals:
.byte 32
.byte 116
.byte 48
.byte 64
.byte 5
.byte 112
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 8
.byte -127
.byte 52
.byte 0
.byte 0
.byte 1
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.globl vdp_ctrl
.data
.align 2
.type vdp_ctrl, @object
.size vdp_ctrl, 4
vdp_ctrl:
.long 12582916
.ident "GCC: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0"
.section .note.GNU-stack,"",@progbits
gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)
Note: It does appear the size of my array determines if it gets optimized out or not. When I made it only two elements it did not optimize out.
In this kind of code you shuold use exact size integers. I strongly advice to pack the structures and unions as well.
#include <stdint.h>
#define vdp_ctrl ((volatile uint32_t *) 0x00C00004)
const unsigned char initial_vdp_vals[24] = {
0x20,
0x74,
0x30,
0x40,
0x05,
0x70,
0x00,
0x00,
0x00,
0x00,
0x00,
0x08,
0x81,
0x34,
0x00,
0x00,
0x01,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00,
0x00
};
typedef struct {
uint16_t upper;
uint8_t reg;
uint8_t val;
} __attribute__((packed)) bitset;
typedef union {
bitset b;
uint32_t as_u32;
} __attribute__((packed)) u_bitset ;
static inline void init_vdp() {
u_bitset cmd = {.b.upper = 0x00008000};
for(int i = 0; i < 24; i++)
{
cmd.b.val = initial_vdp_vals[i];
*vdp_ctrl = cmd.as_u32;
}
}
void init() {
init_vdp();
for(;;) {
}
}
and it generates the code you need.
IMO it better yo have macro instead of the real object. It may not make any difference in this trivial code, but it will if the code becomes more complicated.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With