I'm using DOS to boot up and start my application test.exe. This program starts the BSP (Bootstrap Processor) in real mode and accesses the APIC table at FEE0:0000
to enable the SVI (Spurious vector interrupt) at offset 0x0F0
and send an INIT-SIPI-SIPI
sequence using both ICR_low
(offset 0x300) and ICR_high
(offset 0x310). The BSP enters inside a loop jmp $ to stop executing and lets the APs (Application Processor) execute code at address 0000:8000
and print a character.
It seems the messages aren't being sent to the APs because I don't see any of them print anything to the display..
I'm using FreeDos in real mode. To compile I'm using FASM (flat assembler)
I used OsDev manual that includes the code I'm using to test (with some modifications) as simple as possible, to see if I could get it working. I also referred to the Intel programmers manual and other specs as well as tutorial at Code Project.
I'm only trying to wake the APs up and execute some simple code. All examples that I found enter into unreal mode, protected mode, long mode or are focused in multicore processing. I'm only writing this code to understand how it works.
My code is:
format MZ
USE16
start:
mov ax, cs
mov ds, ax
mov es, ax
mov ss, ax
xor sp, sp
cld
;Clear screen
mov ax, 03h
int 10h
;Move payload to the desired address
mov si, payload
mov cx, payload_end-payload + 1
mov bx,es
mov ax,7c0h
mov es,ax
mov di,400h ;07c0:400 = 8000h
rep movsb
mov es,bx
;Enable APIC table
call enable_lapic
; Wakeup the other APs
;INIT
call lapic_send_init
mov cx, WAIT_10_ms
call us_wait
;SIPI
call lapic_send_sipi
mov cx, WAIT_200_us
call us_wait
;SIPI
call lapic_send_sipi
;Jump to the payload
;Para teste de acordar nucleos
jmp 0000h:8000h ;voltar esse depois
;Payload é o código que será movido para o endereço físico 0x08000
payload:
mov ax, cs
mov ds, ax
xor sp, sp
cld
;Only print letter 'A' directly to video memory
mov cx,0b800h
mov es,cx
mov di,00h
mov al,41h
stosb
cli
hlt
payload_end:
enable_lapic:
mov ecx, IA32_APIC_BASE_MSR
rdmsr
or ah, 08h ;Enable global APIC flag
wrmsr
and ah, 0f0h ; Mask to obtain APIC_Base address
mov DWORD [APIC_BASE], eax ;Save it
shr eax,16
mov bx,fs
mov fs,ax
mov ecx, DWORD [fs:APIC_REG_SIV] ;Load value from SIV (FEE0:00F0) to ecx
or ch, 01h ;bit8: APIC SOFTWARE enable/disable
mov DWORD [fs:APIC_REG_SIV], ecx ;Save it
mov fs,bx
ret
IA32_APIC_BASE_MSR = 1bh
APIC_REG_SIV = 0f0h
APIC_REG_ICR_LOW = 300h
APIC_REG_ICR_HIGH = 310h
APIC_REG_ID = 20h
APIC_BASE dd 00h
;CX = Wait (in ms) Max 65536 us (=0 on input)
us_wait:
mov dx, 80h ;POST Diagnose port, 1us per IO
xor si, si
rep outsb
ret
WAIT_10_ms = 10000
WAIT_200_us = 200
lapic_send_init:
mov eax, DWORD [APIC_BASE]
xor ebx, ebx
shr eax,16
mov cx,fs
mov fs,ax
mov DWORD [fs:APIC_REG_ICR_HIGH], ebx
mov ebx, 0c4500h
mov DWORD [fs:APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
mov fs,cx
ret
lapic_send_sipi:
mov eax, DWORD [APIC_BASE]
xor ebx, ebx
shr eax,16
mov cx,fs
mov fs,ax
mov DWORD [fs:APIC_REG_ICR_HIGH], ebx
mov ebx, 0c4608h
mov DWORD [fs:APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
mov fs,cx
ret
I expect the BSP enters into an infinite loop and the APs execute code at 0000:8000 and print 'A' at video memory.
11/06/2019 Hello everybody!
Now I have a code that can access protected mode. Because I'm with difficult to move to unreal mode I decided to stay in protected mode and enable all cores by this way.
It is a simple code but how Michael Petch said, I tried to do it in a bootloader situation.
Here is the code:
"format binary as 'bin'
use16
org 0x7C00
boot:
mov ax, cs
mov ds, ax
mov es, ax
mov ss, ax
xor sp, sp
;Clear screen
; mov ax, 03h
; int 10h
;Set VGA text mode 3
mov ax,0x3
int 0x10
;Move payload to the desired address
mov si, payload
mov cx, payload_end-payload + 1
;mov si,boot2
;mov cx,boot2_end-boot2+1
mov bx,es
mov ax,7c0h
mov es,ax
mov di,400h ;07c0:400 = 8000h
rep movsb
mov es,bx
;jmp 0000h:8000h
call enableA20Line
call enterProtectedMode
use32
;Enable the APIC
call enable_lapic
;INIT
call lapic_send_init
;mov cx, WAIT_10_ms
;call us_wait
.Verify1:
PAUSE
MOV EBX,[APIC_BASE]
MOV EAX,[EBX+0x300];
SHR EAX,12
TEST EAX,1
JNZ .Verify1
MOV EDI,[APIC_BASE]
ADD EDI,0xB0
MOV dword [EDI],0
;SIPI
call lapic_send_sipi
;mov cx, WAIT_200_us
;call us_wait
.Verify2:
PAUSE
MOV EBX,[APIC_BASE]
MOV EAX,[EBX+0x300];
SHR EAX,12
TEST EAX,1
JNZ .Verify2
MOV EDI,[APIC_BASE]
ADD EDI,0xB0
MOV dword [EDI],0
;SIPI
call lapic_send_sipi
;mov cx, WAIT_200_us
;call us_wait
.Verify3:
PAUSE
MOV EBX,[APIC_BASE]
MOV EAX,[EBX+0x300];
SHR EAX,12
TEST EAX,1
JNZ .Verify3
MOV EDI,[APIC_BASE]
ADD EDI,0xB0
MOV dword [EDI],0
;mov eax,0x8000
;jmp DWORD[eax]
;jmp boot2
;jmp 0x8000
;jmp $
;cli
;hlt
mov eax,0x000b8010
mov dword[eax],0e41h
cli
hlt
use16
enableA20Line:
mov ax,0x2401
int 0x15 ;enable A20 bit
ret
enterProtectedMode:
lgdt[gdt_pointer]
mov eax,cr0
or eax,0x1 ;set the protected mode bit on special cpu reg CR0
mov cr0,eax
jmp CODE_SEG:exit ;long jump to the code segment
exit:
ret
gdt_pointer:
dw gdt_end - gdt_start
dd gdt_start
CODE_SEG = gdt_code - gdt_start
DATA_SEG = gdt_data - gdt_start
gdt_start:
dq 0x0 ;NULL segment
gdt_code:
dw 0xFFFF
dw 0x0
db 0x0
db 10011010b
db 11001111b
db 0x0
gdt_data:
dw 0xFFFF
dw 0x0
db 0x0
db 10010010b
db 11001111b
db 0x0
gdt_end:
;CX = Wait (in ms) Max 65536 us (=0 on input)
us_wait:
mov dx, 80h ;POST Diagnose port, 1us per IO
xor si, si
rep outsb
ret
WAIT_10_ms = 10000
WAIT_200_us = 200
use32
enable_lapic:
mov ecx, IA32_APIC_BASE_MSR
rdmsr
or ah, 08h ;bit11: APIC GLOBAL Enable/Disable
wrmsr
and ah, 0f0h
mov DWORD [APIC_BASE], eax
mov ecx, DWORD [eax+APIC_REG_SIV]
;or ch, 01h ;bit8: APIC SOFTWARE enable/disable
or edx,01FFh
mov DWORD [eax+APIC_REG_SIV], ecx
mov DWORD[eax+0B0h],00h
ret
lapic_send_init:
mov eax, DWORD [APIC_BASE]
xor ebx, ebx
mov DWORD [eax+APIC_REG_ICR_HIGH], ebx
mov ebx, 0c4500h
mov DWORD [eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
lapic_send_sipi:
mov eax, DWORD [APIC_BASE]
xor ebx, ebx
mov DWORD [eax+APIC_REG_ICR_HIGH], ebx
mov ebx, 0c4608h
mov DWORD [eax+APIC_REG_ICR_LOW], ebx ;Writing the low DWORD sent the IPI
ret
IA32_APIC_BASE_MSR = 1bh
APIC_REG_SIV = 0f0h
APIC_REG_ICR_LOW = 300h
APIC_REG_ICR_HIGH = 310h
APIC_REG_ID = 20h
APIC_BASE dd 00h
boot2:
mov ax,DATA_SEG
mov ds,ax
mov es,ax
mov fs,ax
mov gs,ax
mov ss,ax
mov esi,hello2
mov ebx,0b8000h
.loop:
lodsb
or al,al
jz halt
or eax,0x0100
mov word[ebx],ax
add ebx,2
jmp .loop
halt:
cli
hlt
hello2: db "Hello world!",0
boot2_end:
use16
payload:
mov ax,cs
mov ds,ax
xor sp,sp
mov ax,0b800h
mov es,ax
mov di,20h
mov ax,0e45h
mov [es:di],al
cli
hlt
;jmp $
payload_end:
times 510 - ($-$$) db 0 ; pad remaining 510 bytes with zeroes
dw 0xaa55 ; magic bootloader magic - marks this 512 byte sector bootable!"
Now I'm searching for a delay routine to send init and sipi messages. I think this is the problem because this is not working yet.
The BSP prints letter "A" at position 10, and anyone should print another letter at position 20, but only "A" is printed.
Any ideas to help me while I'm searching how to put it to work?
Thanks in advance.
OBS: now I learned how to use the "qemu" emulator and I'm simulating all inside it.
SECOND EDIT: THE CODE WORKS. I'm using qemu emulator with only 1 core. When I use with 2 or more cores, code works!!
You need to use "qemu-system-x86_64 -cpu 486 -smp 2 'path'" without quotes.
12/06/2019 I've tried to run it in a real computer but it only do a reset loop. Have anyone a clue about it?
14/06/2019 Hello! I'm here again! I deal with this big problem about linear addressing inside DOS and I solve it with a previous .exe program that copies kernel.bin (program that send INIT-SIPI-SIPI) to a 0xXXXXXXXX address. Inside kernel.bin I put "org 0xXXXXXXXX", now I do not need to solve all pointers I use. Now INIT-SIPI-SIPI sequence is working.
Link: Switch from protected mode to real mode in a DOS EXE program
Another thing that I need to do is exit protected mode before exit program. If I do not do that, DOS crash. So I used the link above to solve linear addressing (by copying most part of the code to a known memory position) and return control to DOS.
It was funny because I put AP cores in a loop printing "Hello from another core" on the screen and the BSP exit program and goes back to dos. No matter what you do, the message can not be cleaned.
Know I will work on a simple trampoline code to put cores in different positions and executing 4 counter routines. It is the beginning of a function to wake up cores and give them some work. After I will implement MP and MDAT table detection to do by the right way.
Thanks!
Is it possible to wake up intel cores with INIT-SIPI-SIPI sequence with all cores in real mode?
Yes (maybe). There's 2 options:
a) If the CPU/s support x2APIC, then you can enable it and send the INIT-SIPI-SIPI sequence using MSRs (without needing to access memory mapped registers at an address that you can't access in real mode).
b) For xAPIC; it might be possible to change the address that the local APIC uses (by writing to the APIC_BASE MSR) so that it can be accessed in real mode. However, this requires extreme care because the local APIC shouldn't be placed anywhere that is already in use, and all of the space you can access in real mode is likely to already be in use. To get around that you'll probably need "chipset specific" code to modify where accesses are routed (to RAM, to PCI bus, ..) followed by code to reconfigure MTRRs to suit. The APIC_BASE MSR is also a little "CPU specific" (won't exist on 80486, might not exist on CPUs from other vendors). Note: I wouldn't consider this option sane or practical (especially for code that needs to work on more than one computer).
Note: You should only start CPUs that the firmware says exist (and should not broadcast the INIT-SIPI-SIPI sequence to faulty and disabled CPUs); and it's very likely that you won't be able to access ACPI tables (needed to find out which CPUs exist) in real mode. For this reason (because it doesn't make sense to start other CPUs without using protected mode for something) my answer should be considered "for academic purposes only".
When I first encountered this question I knew part of the problem is how DOS adds a level of complexity to code that requires work with linear addresses. I suggested doing it as a bootloader to test where the complexities of a DOS environment are removed. A legacy BIOS bootloader will always have the code placed at physical address 0x07c00. In real-mode physical and linear addresses are the same thing. As long as your bootloader sets the segments to 0x0000 when it starts and an org 0x7c00
directive is used - all memory references will be relative to the beginning of memory. A segment:offset pair of 0x0000:0x7c00 = physical address (0x0000<<4)+0x07c00.
Knowing exactly where your program is in physical memory is important because the LGDT
instruction is one of the few instructions that loads information that requires a linear address:
Loads the values in the source operand into the global descriptor table register (GDTR) or the interrupt descriptor table register (IDTR). The source operand specifies a 6-byte memory location that contains the base address (a linear address) and the limit (size of table in bytes) of the global descriptor table (GDT)
Your code defines the GDT record as:
gdt_pointer:
dw gdt_end - gdt_start
dd gdt_start
In a bootloader that uses org 0x7c00
dd gdt_start
will be filled in with the offset of gdt_start
. That will be an address of 0x7cxx where xx
is some distance from the beginning of the bootloader origin point where gdt_start
resides. It works out well that the value of gdt_start
is also the same as the linear address!
It is assumed in the information below that you have modified the program so it no longer has an org 0x7c00
, no longer has padded 512 bytes (and the boot signature) and that the top line of the file is now format MZ
for DOS executable.
The problem with DOS is that programs get generated with offsets relative to the beginning of the segment(s) that DOS loads the code and data to. These segments may differ each time a program is run depending what is in memory. At assembly time we don't know where in memory our code is loaded, so we can't possibly know the physical(linear) addresses until the program is loaded by DOS and running. This is unlike a bootloader that is always loaded at the same physical address.
Why does this all matter? When FASM generates code for your MZ
(DOS) EXE program the offsets generated will be relative to the beginning of the segment DOS will be loading us to. If the gdt_start
is at offset 0x60 (as an example) from the beginning of the segment, the GDT pointer dd gdt_start
will be filled in with the value 0x60
. Since it will be treated as a linear address by the LGDT
instruction that is telling LGDT that the GDT itself is at linear (physical address) 0x00000060. That is an address in the middle of the interrupt table, not in our program! The first time a segment is reloaded after going into protected mode the processor will look for the GDT at the wrong memory location, read bogus descriptor tables and most likely crash (triple fault/reboot). Effectively the moment you do jmp CODE_SEG:exit
which loads CS selector that is an index into a bogus GDT it will crash.
If DOS loaded your program starting at the beginning of segment 0x1230 (as an example) and the GDT was at offset 0x60 in the program then the linear address (physical) of the GDT in memory is actually (0x1234<<4)+0x60=0x123a0. What you need to do when your program starts running is determine what segment DOS loaded the program to and do this computation and update the GDT address in the gdt_pointer
structure. Using FASM to create DOS programs without segment
directives places all code and data in the same segment. You can get the segment by retrieving the value of CS and then you shift that value left by 4 bits and then add that to the offset stored in the gdt_pointer
by the assembler. You could do this at the beginning of the code when you load CS into the other registers. It has to be done after you setup DS:
mov eax, cs
mov ds, ax
mov es, ax
mov ebx, eax
shl ebx, 4
add [gdt_pointer+2], ebx
; mov ss, ax
; xor sp, sp
I have removed setting up SS:SP as DOS already set them up for us when our program was loaded by the DOS EXE loader. I move CS to EAX so that the upper 16-bits of EAX will be zero which simplifies the code for the calculation. We copy EAX to EBX, shift the value left by 4 bits (same as multiplying by 16 decimal) and adding that directly to the GDT offset portion of gdt_pointer
(gdt_pointer
+2 is the position where the GDT offset is stored). The assembler would have stored the offset of gdt_start
at gdt_pointer+2
and we are adjusting it to be a linear address.
If you were to assemble the code and run it - it will crash!
The GDT is not the only address in your code that needs to be fixed up like the GDT. Consider the jump into protected mode:
jmp CODE_SEG:exit ;long jump to the code segment
exit:
The label exit
is relative to the beginning of the segment we are loaded in. The CODE_SEG
selector points at a 4GiB flat code descriptor with base 0x00000000. Exit
will have a small offset, let us say for sake of argument it is 0xf5. The FAR JMP would go to CODE_SEG:0xf5
which would be memory address 0x000000f5 which is not where we are loaded. There are a number of ways to solve this but most involve FAR JMPing to a fixed up address that we have to compute at run-time. One mechanism is to use a non-zero base in the GDT code descriptor, but that option is outside the scope of this answer. The easiest to understand is creating a 6 byte pointer (32-bit offset and 16-bit segment) in memory and doing an indirect FAR JMP instead. We can fix up the offset of exit
the same way we did gdt_start
. At this point I'd rename exit
to pmode
or something that makes sense.
To do the fix up we can do it at the start like the gdt_pointer
fix up. The starting code would now look something like:
mov eax, cs
mov ds, ax
mov es, ax
mov ebx, eax
shl ebx, 4
add [gdt_pointer+2], ebx
add [pmode_farptr], ebx
; mov ss, ax
; xor sp, sp
In the same area of the bootloader you have the gdt_pointer
structure you'd add a new pmode_farptr
structure that looks like this:
gdt_pointer:
dw gdt_end - gdt_start
dd gdt_start
CODE_SEG = gdt_code - gdt_start
DATA_SEG = gdt_data - gdt_start
pmode_farptr:
dd pmode ; Offset of pmode label
dw CODE_SEG ; Segment to use
The indirect FAR JMP can now be done this way:
jmp fword [pmode_farptr];long jump to the code segment
;indirectly through 6 byte (fword)
;pointer at pmode_farptr
pmode:
ret
The problem is what happens after the FAR JMP which is now:
jmp fword [pmode_farptr];long jump to the code segment
;indirectly through 6 byte (fword)
;pointer at pmode_farptr
pmode:
ret
At the pmode
label you are now in 32-bit protected mode. There is ret
but you haven't set up SS to point to a valid data descriptor, you haven't set up ESP the stack pointer, and you haven't setup the other segment registers! Even if you set up the stack after pmode
to point to the same place as the real-mode stack was pointing, the return address on the stack will be a problem. A 2 byte NEAR return address was pushed on the stack when call enterProtectedMode
was done. We are now in 32-bit protected mode where NEAR addresses are 4 bytes. The easiest thing to do is ditch the ret
and move the pmode
label to the code you already have for 32-bit mode. This code:
call enterProtectedMode
use32
;Enable the APIC
call enable_lapic
Can now become:
call enterProtectedMode
use32
pmode:
movzx esp, sp ; Extend SP to ESP zero extending upper bits
mov eax, ss
shl eax, 4
add esp, eax ; ESP is now the linear address of original SS:SP pointer
mov ax, DATA_SEG ; Reload segment register with 32-bit flat
; flat data selector
mov ss, ax
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
...
Note: Remove the pmode
label and the ret
after the jmp fword [pmode_farptr]
as they are no longer needed.
After making the changes above you should be able to work on your APIC code. There are many shortcomings in this code. See Brendan's answer about APIC specific issues, but beyond that there are several issues that should be addressed:
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With