Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to make bare metal ARM programs and run them on QEMU?

I am trying to get this tutorial to work as intended without success (Something fails after the bl main instruction). According to the tutorial the command

(qemu) xp /1dw 0xa0000018  

should result in the print 33 (But i get 0x00 instead)

a0000018:         33

This is the content of the registers after the main call (see startup.s)

(qemu) info registers 
R00=a000001c R01=a000001c R02=00000006 R03=00000000
R04=00000000 R05=00000005 R06=00000006 R07=00000007
R08=00000008 R09=00000009 R10=00000000 R11=a3fffffc
R12=00000000 R13=00000000 R14=0000003c R15=00000004
PSR=800001db N--- A und32
FPSCR: 00000000

I have the following files

  main.c
  startup.s
  lscript.ld
  Makefile        

And I am using the following toolchain

arm-2013.11-24-arm-none-eabi-i686-pc-linux-gnu

Makefile:

SRCS := main.c startup.s

LINKER_NAME := lscript.ld
ELF_NAME := program.elf
BIN_NAME := program.bin
FLASH_NAME := flash.bin

CC := arm-none-eabi
CFLAGS := -nostdlib
OBJFLAGS ?= -DS
QEMUFLAGS := -M connex -pflash $(FLASH_NAME) -nographic -serial /dev/null

# Allocate 16MB to use as a virtual flash for th qemu
# bs = blocksize -> 4KB
# count = number of block -> 4096
# totalsize = 16MB
setup:
    dd if=/dev/zero of=$(FLASH_NAME) bs=4096 count=4096

# Compile srcs and write to virtual flash
all: clean setup
    $(CC)-gcc $(CFLAGS) -o $(ELF_NAME) -T $(LINKER_NAME) $(SRCS)
    $(CC)-objcopy -O binary $(ELF_NAME) $(BIN_NAME)
    dd if=$(BIN_NAME) of=$(FLASH_NAME) bs=4096 conv=notrunc

objdump:
    $(CC)-objdump $(OBJFLAGS) $(ELF_NAME)

mem-placement:
    $(CC)-nm -n $(ELF_NAME)

qemu:
    qemu-system-arm $(QEMUFLAGS) 

clean:
    rm -rf *.bin
    rm -rf *.elf

main.c:

static int arr[] = { 1, 10, 4, 5, 6, 7 };
static int sum;
static const int n = sizeof(arr) / sizeof(arr[0]);

int main()
{
    int i;

    for (i = 0; i < n; i++){
        sum += arr[i];
    }
    return 0;
}

startup.s:

.section "vectors"
reset:  b     _start
undef:  b     undef
swi:    b     swi
pabt:   b     pabt
dabt:   b     dabt
    nop
irq:    b     irq
fiq:    b     fiq

    .text
_start:
init:
    @@ Copy data to RAM.
    ldr   r0, =flash_sdata
    ldr   r1, =ram_sdata
    ldr   r2, =data_size

    @@ Handle data_size == 0
    cmp   r2, #0
    beq   init_bss

copy:
    ldrb   r4, [r0], #1
    strb   r4, [r1], #1
    subs   r2, r2, #1
    bne    copy

init_bss:
    @@ Initialize .bss
    ldr   r0, =sbss
    ldr   r1, =ebss
    ldr   r2, =bss_size

    @@ Handle bss_size == 0
    cmp   r2, #0
    beq   init_stack
    mov   r4, #0

zero:
    strb  r4, [r0], #1
    subs  r2, r2, #1
    bne   zero

init_stack:
    @@ Initialize the stack pointer
    ldr   sp, =0xA4000000

    @@ **this call dosent work as expected.. (r13/sp contains 0xA4000000)**
    bl    main

    @@ Dosent return from main
    @@ r0 should now contain 33 
stop:
    b     stop

lscript.ld:

/*
 * Linker for testing purposes 
 * (using 16 MB virtual flash = 0x0100_0000)
 */

MEMORY {
    rom (rx)  : ORIGIN = 0x00000000, LENGTH = 0x01000000
    ram (rwx) : ORIGIN = 0xA0000000, LENGTH = 0x04000000
}


SECTIONS {
    .text : {
          * (vectors);
          * (.text);
    } > rom

    .rodata : {
          * (.rodata);
    } > rom

    flash_sdata = .;
    ram_sdata = ORIGIN(ram);

    .data : AT (flash_sdata) {
          * (.data);
    } > ram

    ram_edata = .;
    data_size = ram_edata - ram_sdata;

    sbss = .;
    .bss : {
         * (.bss);
    } > ram
    ebss = .;
    bss_size = ebss - sbss;

    /DISCARD/ : {
      *(.note*)
      *(.comment)
      *(.ARM*)
      /*
      *(.debug*)
      */
    }
} 

Disassembly of the executable (objdump):

program.elf:     file format elf32-littlearm

Disassembly of section .text:
00000000 <reset>:
0:  ea000023    b   94 <_start>

00000004 <undef>:
4:  eafffffe    b   4 <undef>

00000008 <swi>:
8:  eafffffe    b   8 <swi>

0000000c <pabt>:
c:  eafffffe    b   c <pabt>

00000010 <dabt>:
10: eafffffe    b   10 <dabt>
14: e320f000    nop {0} 

00000018 <irq>:
18: eafffffe    b   18 <irq>

0000001c <fiq>:
1c: eafffffe    b   1c <fiq>

00000020 <main>:
20: e52db004    push    {fp}        ; (str fp, [sp, #-4]!)
24: e28db000    add fp, sp, #0
28: e24dd00c    sub sp, sp, #12
2c: e3a03000    mov r3, #0
30: e50b3008    str r3, [fp, #-8]
34: ea00000d    b   70 <main+0x50>
38: e3003000    movw    r3, #0
3c: e34a3000    movt    r3, #40960  ; 0xa000
40: e51b2008    ldr r2, [fp, #-8]
44: e7932102    ldr r2, [r3, r2, lsl #2]
48: e3003018    movw    r3, #24
4c: e34a3000    movt    r3, #40960  ; 0xa000
50: e5933000    ldr r3, [r3]
54: e0822003    add r2, r2, r3
58: e3003018    movw    r3, #24
5c: e34a3000    movt    r3, #40960  ; 0xa000
60: e5832000    str r2, [r3]
64: e51b3008    ldr r3, [fp, #-8]
68: e2833001    add r3, r3, #1
6c: e50b3008    str r3, [fp, #-8]
70: e3a02006    mov r2, #6
74: e51b3008    ldr r3, [fp, #-8]
78: e1530002    cmp r3, r2
7c: baffffed    blt 38 <main+0x18>
80: e3a03000    mov r3, #0
84: e1a00003    mov r0, r3
88: e24bd000    sub sp, fp, #0
8c: e49db004    pop {fp}        ; (ldr fp, [sp], #4)
90: e12fff1e    bx  lr 

00000094 <_start>:
94: e59f004c    ldr r0, [pc, #76]   ; e8 <stop+0x4>
98: e59f104c    ldr r1, [pc, #76]   ; ec <stop+0x8>
9c: e59f204c    ldr r2, [pc, #76]   ; f0 <stop+0xc>
a0: e3520000    cmp r2, #0
a4: 0a000003    beq b8 <init_bss> 

000000a8 <copy>:
a8: e4d04001    ldrb    r4, [r0], #1
ac: e4c14001    strb    r4, [r1], #1
b0: e2522001    subs    r2, r2, #1
b4: 1afffffb    bne a8 <copy> 

000000b8 <init_bss>:
b8: e59f0034    ldr r0, [pc, #52]   ; f4 <stop+0x10>
bc: e59f1034    ldr r1, [pc, #52]   ; f8 <stop+0x14>
c0: e59f2034    ldr r2, [pc, #52]   ; fc <stop+0x18>
c4: e3520000    cmp r2, #0
c8: 0a000003    beq dc <init_stack>
cc: e3a04000    mov r4, #0

000000d0 <zero>:
d0: e4c04001    strb    r4, [r0], #1
d4: e2522001    subs    r2, r2, #1
d8: 1afffffc    bne d0 <zero>  

000000dc <init_stack>:
dc: e3a0d329    mov sp, #-1543503872    ; 0xa4000000
e0: ebffffce    bl  20 <main> 

000000e4 <stop>:
e4: eafffffe    b   e4 <stop>
e8: 00000104    andeq   r0, r0, r4, lsl #2
ec: a0000000    andge   r0, r0, r0
f0: 00000018    andeq   r0, r0, r8, lsl r0
f4: a0000018    andge   r0, r0, r8, lsl r0
f8: a000001c    andge   r0, r0, ip, lsl r0
fc: 00000004    andeq   r0, r0, r4

Disassembly of section .rodata:
00000100 <n>:
100:    00000006    andeq   r0, r0, r6

Disassembly of section .data:
a0000000 <arr>:
a0000000:   00000001    andeq   r0, r0, r1
a0000004:   0000000a    andeq   r0, r0, sl
a0000008:   00000004    andeq   r0, r0, r4
a000000c:   00000005    andeq   r0, r0, r5
a0000010:   00000006    andeq   r0, r0, r6
a0000014:   00000007    andeq   r0, r0, r7

Disassembly of section .bss: 
a0000018 <sum>:
a0000018:   00000000    andeq   r0, r0, r0

Can someone point me in the right direction to why this isn't working according to my expectations?

Thanks Henrik

like image 571
Henrik Avatar asked Dec 15 '22 04:12

Henrik


2 Answers

Minimal examples that just work

  • https://github.com/cirosantilli/linux-kernel-module-cheat/tree/54e15e04338c0fecc0be139a0da2d0d972c21419#baremetal-setup-getting-started

    The prompt.c example takes input from your host terminal and gives back output all through the simulated UART:

    enter a character
    got: a
    new alloc of 1 bytes at address 0x0x4000a1c0
    enter a character
    got: b
    new alloc of 2 bytes at address 0x0x4000a1c0
    enter a character
    

    It uses Newlib to expose a subset of the C standard library. This allows you to run existing programs written in C if the only use that restricted subset of the C standard library.

    More details about Newlib at: https://electronics.stackexchange.com/questions/223929/c-standard-libraries-on-bare-metal/400077#400077

  • https://github.com/freedomtan/aarch64-bare-metal-qemu/tree/2ae937a2b106b43bfca49eec49359b3e30eac1b1 for -M virt, just the hello world on the repo. Compile with:

    sudo apt-get install gcc-aarch64-linux-gnu
    make CROSS_PREFIX=aarch64-linux-gnu-
    

    Here is the example minimized to printing a single character from assembly: How to run a bare metal ELF file on QEMU?

  • https://github.com/bztsrc/raspi3-tutorial for -M raspi3. Quick getting started at: https://raspberrypi.stackexchange.com/questions/34733/how-to-do-qemu-emulation-for-bare-metal-raspberry-pi-images/85135#85135 Several other examples on the repo going to more advanced subjects.

    Also does display output on 09_framebuffer.

Both write a hello world to the UART.

Tested in Ubuntu 18.04, gcc-aarch64-linux-gnu version 4:7.3.0-3ubuntu2.


Debugging!

First, look at the PC and PSR: You're in Undef mode, in the undefined instruction handler.

OK, in an exception mode, the LR tells you where you took the exception. There are some slightly complicated rules between the PC offset and the preferred return address determining exactly what it points at, but just eyeballing it it's clearly in the vicinity of the movw/movt pair.

The movw instruction effectively only exists in the ARMv7 ISA onwards. A brief investigation tells me the machine you're emulating is some old PXA255 thing, whose CPU only implements the ARMv5 ISA. Thus it's not surprising it faults on an instruction that it predates by many years.

Your compiler is apparently configured to target ARMv7 by default (which is not uncommon), so you need to add at least -march=armv5te to your CFLAGS to target the appropriate architecture version. The 'advanced' challenge would be to switch to a different, newer, machine, but that's going to involve adapting the linker script to a new memory map and rewriting any hardware-touching code for new peripherals, so I'd save that idea for the longer term, once you're comfortable with the basics of bare-metal code and slogging through hardware reference manuals.

like image 41
Notlikethat Avatar answered Jan 16 '23 01:01

Notlikethat