Skip to content

Commit

Permalink
aarch64: move kernel to 63rd GB of virtual memory
Browse files Browse the repository at this point in the history
This patch modifies the aarch64 port to move the kernel from
the 2nd to 63rd GB or virtual memory. It also adjusts the early preboot and boot
assembly to dynamically adjust the early phys/virt mapping tables to make
it work regardless where in physical memory the kernel and DTB is loaded.
This allows us to use the same kernel binary on QEMU and Firecracker without
having to adjust relevant variables in the makefile and rebuild it to accomodate
the fact that each hypervisor would load loader.img in different area
of physical memory. Prior to this patch the kernel would be mapped 1:1
in the first 4 GB of phys/virt memory.

In essence, this patch enhances the preboot.S to dynamically identify
location of start_elf to jump to. Then it modifies boot.S to dynamically
calculate the offset between where kernel is located in virtual memory and where
it is loaded in physical memory and then adjust the 63rd GB of early boot
mapping tables accordingly. Finally it also adjust the virt/phys and phys/virt
translation functions in core/mmu.cc and other aspects in elf.cc and makefile.

After the patch the virtual memory layout would look like this in QEMU:

           vaddr            paddr     size perm memattr name
         8000000          8000000    10000 rwxp     dev gic_dist
         8010000          8010000    10000 rwxp     dev gic_cpu
         9000000          9000000     1000 rwxp     dev pl011
         9010000          9010000     1000 rwxp     dev pl031
        10000000         10000000 2eff0000 rwxp     dev pci_mem
        3eff0000         3eff0000    10000 rwxp     dev pci_io
       fc0000000         40000000   84e000 rwxp  normal kernel
      4010000000       4010000000 10000000 rwxp     dev pci_cfg
ffff80000a000000          a000000      200 rwxp  normal virtio_mmio_cfg
ffff80000a000200          a000200      200 rwxp  normal virtio_mmio_cfg
ffff80000a000400          a000400      200 rwxp  normal virtio_mmio_cfg
ffff80000a000600          a000600      200 rwxp  normal virtio_mmio_cfg
ffff80000a000800          a000800      200 rwxp  normal virtio_mmio_cfg
ffff80000a000a00          a000a00      200 rwxp  normal virtio_mmio_cfg
ffff80000a000c00          a000c00      200 rwxp  normal virtio_mmio_cfg
ffff80000a000e00          a000e00      200 rwxp  normal virtio_mmio_cfg
ffff80004084e000         4084e000 7f7b2000 rwxp  normal main
ffff90004084e000         4084e000 7f7b2000 rwxp  normal page
ffffa0004084e000         4084e000 7f7b2000 rwxp  normal mempool

Fixes #1087

Changes since V1: removed some code from loader.cc that got
acidentally added as part of the initial version of the patch.

Signed-off-by: Waldemar Kozaczuk <[email protected]>
  • Loading branch information
wkozaczuk committed May 4, 2022
1 parent 6e1527c commit 850ce0e
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 33 deletions.
27 changes: 18 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,12 @@ kernel-defines = -D_KERNEL $(source-dialects) $(cc-hide-flags) $(gc-flags)
# To add something that will *not* be part of the main kernel, you can do:
#
# mydir/*.o EXTRA_FLAGS = <MY_STUFF>
ifeq ($(arch),x64)
EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_BASE=$(kernel_base) -DOSV_KERNEL_VM_BASE=$(kernel_vm_base) \
-DOSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) -DOSV_LZKERNEL_BASE=$(lzkernel_base)
else
EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_VM_BASE=$(kernel_vm_base)
endif
EXTRA_LIBS =
COMMON = $(autodepend) -g -Wall -Wno-pointer-arith $(CFLAGS_WERROR) -Wformat=0 -Wno-format-security \
-D __BSD_VISIBLE=1 -U _FORTIFY_SOURCE -fno-stack-protector $(INCLUDES) \
Expand Down Expand Up @@ -497,12 +501,13 @@ acpi = $(patsubst %.c, %.o, $(acpi-source))

$(acpi:%=$(out)/%): CFLAGS += -fno-strict-aliasing -Wno-stringop-truncation

kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - $(kernel_base) )) ))

endif # x64

ifeq ($(arch),aarch64)

kernel_base := 0x40080000
kernel_vm_base := $(kernel_base)
kernel_vm_base := 0xfc0080000 #63GB
app_local_exec_tls_size := 0x40

include $(libfdt_base)/Makefile.libfdt
Expand All @@ -516,7 +521,7 @@ $(out)/preboot.bin: $(out)/preboot.elf
$(call quiet, $(OBJCOPY) -O binary $^ $@, OBJCOPY $@)

edata = $(shell readelf --syms $(out)/loader.elf | grep "\.edata" | awk '{print "0x" $$2}')
image_size = $$(( $(edata) - $(kernel_base) ))
image_size = $$(( $(edata) - $(kernel_vm_base) ))

$(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf
$(call quiet, dd if=$(out)/preboot.bin of=$@ > /dev/null 2>&1, DD $@ preboot.bin)
Expand All @@ -526,8 +531,6 @@ $(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf

endif # aarch64

kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - $(kernel_base) )) ))

$(out)/bsd/sys/crypto/rijndael/rijndael-api-fst.o: COMMON+=-fno-strict-aliasing
$(out)/bsd/sys/crypto/sha2/sha2.o: COMMON+=-fno-strict-aliasing
$(out)/bsd/sys/net/route.o: COMMON+=-fno-strict-aliasing
Expand Down Expand Up @@ -2071,9 +2074,16 @@ endif
$(out)/default_version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols
$(call quiet, scripts/generate_version_script.sh $(out)/default_version_script, GEN default_version_script)

ifeq ($(arch),aarch64)
def_symbols = --defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base)
else
def_symbols = --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) \
--defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift)
endif

$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
$(call quiet, $(LD) -o $@ $(def_symbols) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
$(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
Expand All @@ -2085,8 +2095,7 @@ $(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(lo
$(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so -T $(out)/libosv.ld, LIBOSV.SO)

$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
$(call quiet, $(LD) -o $@ $(def_symbols) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
$(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
Expand Down
10 changes: 6 additions & 4 deletions arch/aarch64/arch-dtb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ void __attribute__((constructor(init_prio::dtb))) dtb_setup()
}

olddtb = dtb;
dtb = (void *)OSV_KERNEL_BASE;
dtb = (void *)OSV_KERNEL_VM_BASE;

if (fdt_open_into(olddtb, dtb, 0x10000) != 0) {
abort("dtb_setup: failed to move dtb (dtb too large?)\n");
Expand Down Expand Up @@ -808,15 +808,17 @@ void __attribute__((constructor(init_prio::dtb))) dtb_setup()
register u64 edata;
asm volatile ("adrp %0, .edata" : "=r"(edata));

/* import from loader.cc */
/* import from loader.cc and core/mmu.cc */
extern elf::Elf64_Ehdr *elf_header;
extern size_t elf_size;
extern void *elf_start;
extern u64 kernel_vm_shift;

elf_start = reinterpret_cast<void *>(elf_header);
mmu::elf_phys_start = reinterpret_cast<void *>(elf_header);
elf_start = mmu::elf_phys_start + kernel_vm_shift;
elf_size = (u64)edata - (u64)elf_start;

/* remove amount of memory used for ELF from avail memory */
mmu::phys addr = (mmu::phys)elf_start + elf_size;
mmu::phys addr = (mmu::phys)mmu::elf_phys_start + elf_size;
memory::phys_mem_size -= addr - mmu::mem_addr;
}
1 change: 1 addition & 0 deletions arch/aarch64/arch-mmu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace mmu {
constexpr int max_phys_addr_size = 48;
extern u64 mem_addr; /* set by the dtb_setup constructor */
extern void *elf_phys_start;

enum class mattr {
normal,
Expand Down
10 changes: 7 additions & 3 deletions arch/aarch64/arch-setup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ void arch_setup_free_memory()

/* import from loader.cc */
extern size_t elf_size;
extern void *elf_start;
extern elf::Elf64_Ehdr* elf_header;

mmu::phys addr = (mmu::phys)elf_start + elf_size;
mmu::phys addr = (mmu::phys)elf_header + elf_size;
mmu::free_initial_memory_range(addr, memory::phys_mem_size);

/* linear_map [TTBR1] */
Expand All @@ -100,7 +100,11 @@ void arch_setup_free_memory()
}

/* linear_map [TTBR0 - boot, DTB and ELF] */
mmu::linear_map((void *)mmu::mem_addr, (mmu::phys)mmu::mem_addr,
/* physical memory layout - relative to the 2MB-aligned address PA stored in mmu::mem_addr
PA + 0x0 - PA + 0x80000: boot
PA + 0x80000 - PA + 0x90000: DTB copy
PA + 0x90000 - [addr]: kernel ELF */
mmu::linear_map((void *)(OSV_KERNEL_VM_BASE - 0x80000), (mmu::phys)mmu::mem_addr,
addr - mmu::mem_addr, "kernel");

if (console::PL011_Console::active) {
Expand Down
2 changes: 1 addition & 1 deletion arch/aarch64/arch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace arch {

#define CACHELINE_ALIGNED __attribute__((aligned(64)))
#define INSTR_SIZE_MIN 4
#define ELF_IMAGE_START (OSV_KERNEL_BASE + 0x10000)
#define ELF_IMAGE_START (OSV_KERNEL_VM_BASE + 0x10000)

inline void irq_disable()
{
Expand Down
138 changes: 129 additions & 9 deletions arch/aarch64/boot.S
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,26 @@
start_elf:
/* elf program start address */
/* input: x3=elf header x4=cmdline x5=dtb */
bl calculate_virt_offset
/* x6 contains virt-phys offset */
adrp x0, exception_vectors
add x0, x0, x6 // apply virt-phys offset to make it virtual address
msr vbar_el1, x0
isb

bl validate_el // check that we are at EL1 or die
bl validate_el // check that we are at EL1 or die
bl init_stack
bl zero_bss // initialize bss contents to 0
bl zero_bss // initialize bss contents to 0
bl init_cpu
bl init_boot_pt
bl fixup_boot_pt // apply virt-phys offset to the boot mapping tables
bl init_boot_pt // set mapping table registers
/* at this point the virtual memory is not yet enabled but we are ready to enable it */
bl init_mmu

/* after we returned from init_mmu we are operating in the virtual memory space */
adrp x0, kernel_vm_shift
str x6, [x0] // save the kernel VM shift under kernel_vm_shift

adrp x1, elf_header // store elf header address
str x3, [x1, #:lo12:elf_header]
adrp x1, cmdline // store cmdline (arch-setup.cc)
Expand All @@ -36,7 +45,6 @@ start_elf:
mov x29, xzr
bl init_xen
#endif

mov x29, xzr
bl premain

Expand All @@ -48,12 +56,19 @@ start_elf:

bl halt

calculate_virt_offset:
adr x0, text_start // fetch physical address of text_start
ldr x6, =(OSV_KERNEL_VM_BASE + 0x20000) // calculate virtual address of text_start per loader.ld
sub x6, x6, x0
ret

init_stack:
mov x0, #1 // select SP_ELx
msr spsel, x0
isb

adrp x0, init_stack_top
add x0, x0, x6 // make it virtual address
mov sp, x0
ret

Expand Down Expand Up @@ -119,12 +134,80 @@ init_cpu:
isb
ret

fixup_boot_pt:
// x6 contains kernel phys/virt offset
mov x29, x30 // save link register so wa can call fixup_pt_entry

/* we need to fix the addresses of the ident subtables to make them physical */
/* as the virt-phys mapping structures use physical addresses during walk-up */

// subtract p/v offset from the address of the subtable ident_pt_l3_ttbr0
adrp x0, ident_pt_l4_ttbr0
bl fixup_pt_entry

// subtract p/v offset from the address of the subtable ident_pt_l2_0_ttbr0
adrp x0, ident_pt_l3_ttbr0
bl fixup_pt_entry
// subtract p/v offset from the address of the subtable ident_pt_l2_1_ttbr0
add x0, x0, #8
bl fixup_pt_entry
// subtract p/v offset from the address of the subtable ident_pt_l2_2_ttbr0
add x0, x0, #8
bl fixup_pt_entry
// subtract p/v offset from the address of the subtable ident_pt_l2_3_ttbr0
add x0, x0, #8
bl fixup_pt_entry
// subtract p/v offset from the address of the subtable kernel_pt_l2_63_ttbr0
add x0, x0, #480 //=60*8
bl fixup_pt_entry

// fix 63-64 GB mapping which maps the kernel code

// From https://www.kernel.org/doc/Documentation/arm64/booting.txt:
//"The Image must be placed text_offset bytes from a 2MB aligned base
// address anywhere in usable system RAM and called there."

// Given the kernel can be loaded in any place of the allowed range of physical memory
// we have to dynamically figure out the physical address so that we can correctly
// initialize the pt entries of the 63th GB of the virtual memory which is where
// the kernel is expected to be per loader.ld.

// We use the physical address of the symbol text_start located in the 1st 2MB
// of the kernel text to identify which 2MB of RAM the kernel ELF is loaded.
// Then we use this information to map the 63-rd GB of virtual memory into
// whatever 2 MB-aligned area of physical memory the kernel is loaded.

// round-up to whole 2MB and store in x2
adrp x0, text_start
lsr x2, x0, #21
lsl x2, x2, #21

// start with pt entry stored at kernel_pt_l2_63_ttbr0 (x0)
adrp x0, kernel_pt_l2_63_ttbr0
mov x7, #512
kernel_pt_loop:
ldr x1, [x0] // load initial value of pt entry at address stored in x0
add x1, x1, x2 // add the physical address offset
str x1, [x0] // store the fixed pt value

add x0, x0, #8 // fix another 2MB
sub x7, x7, #1
cbnz x7, kernel_pt_loop

mov x30, x29 // restore link register (x30) so we can properly return ret

fixup_pt_entry:
ldr x1, [x0] // x0 contains address of the entry, x6 contains offset
sub x1, x1, x6 // apply kernel offset
str x1, [x0] // store the fixed pt entry back into memory
ret

init_boot_pt:
adrp x0, ident_pt_l4_ttbr0
adrp x1, ident_pt_l4_ttbr1
ret

init_secondary_pt:
init_runtime_pt:
adrp x1, smpboot_ttbr0
ldr x0, [x1], #8
ldr x1, [x1]
Expand All @@ -150,22 +233,42 @@ init_mmu:
msr sctlr_el1, x0
isb

// Apply offset to switch to the kernel VM address so that we jump
// into virtual memory space of where kernel is mapped (63-rd GB)
add x30, x30, x6
ret

switch_to_runtime_pt:
msr ttbr0_el1, x0
msr ttbr1_el1, x1
isb

dsb sy //Flush TLB
tlbi vmalle1
dsb sy
isb

ret

.align 16
.globl start_secondary_cpu
.hidden start_secondary_cpu
.type start_secondary_cpu , "function"
start_secondary_cpu:
bl calculate_virt_offset
/* x6 contains virt-phys offset */
adrp x0, exception_vectors
add x0, x0, x6 // apply virt-phys offset to make it virtual address
msr vbar_el1, x0
isb

bl init_cpu
bl init_boot_pt
bl init_mmu
bl init_secondary_pt
bl init_boot_pt // use the boot mapping tables fixed by start_elf on primary CPU
bl init_mmu
/* after we returned from init_mmu we are operating in the virtual memory space */
/* but on secondary CPU we can subsequently switch to the runtime mapping tables */
bl init_runtime_pt
bl switch_to_runtime_pt

ldr x0, =smp_stack_free /* ptr */
ldr x1, [x0] /* old value */
Expand Down Expand Up @@ -199,7 +302,11 @@ ident_pt_l3_ttbr0:
.quad ident_pt_l2_1_ttbr0 + 0x3 // Map 1GB-2GB one-to-one
.quad ident_pt_l2_2_ttbr0 + 0x3 // Map 2GB-3GB one-to-one
.quad ident_pt_l2_3_ttbr0 + 0x3 // Map 3GB-4GB one-to-one
.rept 508
.rept 59
.quad 0
.endr
.quad kernel_pt_l2_63_ttbr0 + 0x3 // Map 63GB-64GB -> 1GB-2GB
.rept 448
.quad 0
.endr
ident_pt_l2_0_ttbr0:
Expand Down Expand Up @@ -230,6 +337,13 @@ ident_pt_l2_3_ttbr0:
.quad offset + (index << 21) + 0x411
index = index + 1
.endr
kernel_pt_l2_63_ttbr0:
index = 0
offset = 0
.rept 512
.quad offset + (index << 21) + 0x411
index = index + 1
.endr
.align 12
ident_pt_l4_ttbr1:
.rept 512
Expand Down Expand Up @@ -258,6 +372,12 @@ smp_stack_free: .quad 0
smpboot_ttbr0: .quad 0
smpboot_ttbr1: .quad 0

.hidden kernel_vm_shift
.globl kernel_vm_shift
.type kernel_vm_shift, "object"
.align 16
kernel_vm_shift: .quad 0

/* hmm should we provide an interrupt stack?
. = . + 4096*10
.global interrupt_stack_top
Expand Down
5 changes: 4 additions & 1 deletion arch/aarch64/loader.ld
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ SECTIONS
*
* We can't export the ELF header base as a symbol, because ld
* insists on moving stuff around if we do.
*
* We add 0x10000 (64 KB) below to create space for our copy of DTB tree
* (see dtb_setup() in arch-dtb.cc)
*/
. = OSV_KERNEL_BASE + 0x10000 + 0x1000;
. = OSV_KERNEL_VM_BASE + 0x10000 + 0x1000;

.dynamic : ALIGN(16) { *(.dynamic) } : dynamic : text

Expand Down
Loading

0 comments on commit 850ce0e

Please sign in to comment.