diff --git a/core/arch/arm/arm.mk b/core/arch/arm/arm.mk index fc9a0e91ea9..0f36decbdea 100644 --- a/core/arch/arm/arm.mk +++ b/core/arch/arm/arm.mk @@ -49,6 +49,12 @@ ifeq ($(CFG_CORE_LARGE_PHYS_ADDR),y) $(call force,CFG_WITH_LPAE,y) endif +# Unmaps all kernel mode code except the code needed to take exceptions +# from user space and restore kernel mode mapping again. This gives more +# strict control over what is accessible while in user mode. +# Addresses CVE-2017-5715 (aka Meltdown) known to affect Arm Cortex-A75 +CFG_CORE_UNMAP_CORE_AT_EL0 ?= y + ifeq ($(CFG_ARM32_core),y) # Configration directive related to ARMv7 optee boot arguments. # CFG_PAGEABLE_ADDR: if defined, forces pageable data physical address. diff --git a/core/arch/arm/include/kernel/thread.h b/core/arch/arm/include/kernel/thread.h index ddeea7f5532..66095d29d21 100644 --- a/core/arch/arm/include/kernel/thread.h +++ b/core/arch/arm/include/kernel/thread.h @@ -517,6 +517,13 @@ void thread_unwind_user_mode(uint32_t ret, uint32_t exit_status0, vaddr_t thread_get_saved_thread_sp(void); #endif /*ARM64*/ +/* + * Provides addresses and size of kernel code that must be mapped while in + * user mode. + */ +void thread_get_user_kcode(struct mobj **mobj, size_t *offset, + vaddr_t *va, size_t *sz); + /* * Returns the start address (bottom) of the stack for the current thread, * zero if there is no current thread. diff --git a/core/arch/arm/include/kernel/user_ta.h b/core/arch/arm/include/kernel/user_ta.h index 2e7669e141c..c945fdad6f2 100644 --- a/core/arch/arm/include/kernel/user_ta.h +++ b/core/arch/arm/include/kernel/user_ta.h @@ -55,7 +55,6 @@ struct user_ta_ctx { struct mobj *mobj_code; /* secure world memory */ struct mobj *mobj_stack; /* stack */ uint32_t load_addr; /* elf load addr (from TAs address space) */ - uint32_t context; /* Context ID of the process */ struct tee_mmu_info *mmu; /* Saved MMU information (ddr only) */ void *ta_time_offs; /* Time reference used by the TA */ struct tee_pager_area_head *areas; diff --git a/core/arch/arm/include/mm/core_mmu.h b/core/arch/arm/include/mm/core_mmu.h index fe033508057..b8be19a51ac 100644 --- a/core/arch/arm/include/mm/core_mmu.h +++ b/core/arch/arm/include/mm/core_mmu.h @@ -28,13 +28,16 @@ #ifndef CORE_MMU_H #define CORE_MMU_H +#ifndef ASM #include #include #include #include -#include #include #include +#endif + +#include /* A small page is the smallest unit of memory that can be mapped */ #define SMALL_PAGE_SHIFT 12 @@ -74,6 +77,17 @@ #define CFG_TEE_RAM_VA_SIZE CORE_MMU_PGDIR_SIZE #endif +/* + * CORE_MMU_L1_TBL_OFFSET is used when switching to/from reduced kernel + * mapping. The actual value depends on internals in core_mmu_lpae.c and + * core_mmu_v7.c which we rather not expose here. There's a compile time + * assertion to check that these magic numbers are correct. + */ +#ifdef CFG_WITH_LPAE +#define CORE_MMU_L1_TBL_OFFSET (CFG_TEE_CORE_NB_CORE * 4 * 8) +#else +#define CORE_MMU_L1_TBL_OFFSET (4096 * 4) +#endif /* * TEE_RAM_VA_START: The start virtual address of the TEE RAM * TEE_TEXT_VA_START: The start virtual address of the OP-TEE text @@ -90,6 +104,7 @@ #define STACK_ALIGNMENT (sizeof(long) * 2) #endif +#ifndef ASM /* * Memory area type: * MEM_AREA_END: Reserved, marks the end of a table of mapping areas. @@ -556,4 +571,6 @@ void core_mmu_set_discovered_nsec_ddr(struct core_mmu_phys_mem *start, struct mobj **core_sdp_mem_create_mobjs(void); #endif +#endif /*ASM*/ + #endif /* CORE_MMU_H */ diff --git a/core/arch/arm/include/mm/mobj.h b/core/arch/arm/include/mm/mobj.h index 8a411f00f26..8125910cb04 100644 --- a/core/arch/arm/include/mm/mobj.h +++ b/core/arch/arm/include/mm/mobj.h @@ -56,6 +56,7 @@ struct mobj_ops { extern struct mobj mobj_virt; extern struct mobj *mobj_sec_ddr; +extern struct mobj *mobj_tee_ram; static inline void *mobj_get_va(struct mobj *mobj, size_t offset) { diff --git a/core/arch/arm/kernel/asm-defines.c b/core/arch/arm/kernel/asm-defines.c index a08ced3d4ca..fd91703946d 100644 --- a/core/arch/arm/kernel/asm-defines.c +++ b/core/arch/arm/kernel/asm-defines.c @@ -68,6 +68,7 @@ DEFINES DEFINE(THREAD_SMC_ARGS_SIZE, sizeof(struct thread_smc_args)); DEFINE(THREAD_SVC_REG_X0, offsetof(struct thread_svc_regs, x0)); + DEFINE(THREAD_SVC_REG_X2, offsetof(struct thread_svc_regs, x2)); DEFINE(THREAD_SVC_REG_X5, offsetof(struct thread_svc_regs, x5)); DEFINE(THREAD_SVC_REG_X6, offsetof(struct thread_svc_regs, x6)); DEFINE(THREAD_SVC_REG_X30, offsetof(struct thread_svc_regs, x30)); diff --git a/core/arch/arm/kernel/thread.c b/core/arch/arm/kernel/thread.c index 99e64487604..26908a1b593 100644 --- a/core/arch/arm/kernel/thread.c +++ b/core/arch/arm/kernel/thread.c @@ -147,6 +147,11 @@ thread_pm_handler_t thread_cpu_resume_handler_ptr; thread_pm_handler_t thread_system_off_handler_ptr; thread_pm_handler_t thread_system_reset_handler_ptr; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 +static vaddr_t thread_user_kcode_va; +long thread_user_kcode_offset; +static size_t thread_user_kcode_size; +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ static unsigned int thread_global_lock = SPINLOCK_UNLOCK; static bool thread_prealloc_rpc_cache; @@ -883,6 +888,25 @@ static void init_thread_stacks(void) } #endif /*CFG_WITH_PAGER*/ +static void init_user_kcode(void) +{ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + vaddr_t v; + + v = (vaddr_t)thread_vect_table; + thread_user_kcode_va = ROUNDDOWN(v, CORE_MMU_USER_CODE_SIZE); + /* + * The maximum size of the exception vector and associated code is + * something slightly larger than 2 KiB. Worst case the exception + * vector can span two pages. + */ + thread_user_kcode_size = CORE_MMU_USER_CODE_SIZE * 2; + + core_mmu_get_user_va_range(&v, NULL); + thread_user_kcode_offset = thread_user_kcode_va - v; +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ +} + void thread_init_primary(const struct thread_handlers *handlers) { init_handlers(handlers); @@ -892,6 +916,8 @@ void thread_init_primary(const struct thread_handlers *handlers) init_thread_stacks(); pgt_init(); + + init_user_kcode(); } static void init_sec_mon(size_t pos __maybe_unused) @@ -1144,6 +1170,17 @@ uint32_t thread_enter_user_mode(unsigned long a0, unsigned long a1, spsr, exit_status0, exit_status1); } +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 +void thread_get_user_kcode(struct mobj **mobj, size_t *offset, + vaddr_t *va, size_t *sz) +{ + core_mmu_get_user_va_range(va, NULL); + *mobj = mobj_tee_ram; + *offset = thread_user_kcode_va - CFG_TEE_RAM_START; + *sz = thread_user_kcode_size; +} +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ + void thread_add_mutex(struct mutex *m) { struct thread_core_local *l = thread_get_core_local(); diff --git a/core/arch/arm/kernel/thread_a32.S b/core/arch/arm/kernel/thread_a32.S index 59cc6c285fd..31dfba30dee 100644 --- a/core/arch/arm/kernel/thread_a32.S +++ b/core/arch/arm/kernel/thread_a32.S @@ -34,12 +34,21 @@ #include #include #include +#include #include #include #include #include "thread_private.h" + .macro cmp_spsr_user_mode reg:req + /* + * We're only testing the lower 4 bits as bit 5 (0x10) + * always is set. + */ + tst \reg, #0x0f + .endm + LOCAL_FUNC vector_std_smc_entry , : UNWIND( .fnstart) UNWIND( .cantunwind) @@ -246,14 +255,20 @@ UNWIND( .cantunwind) ldm r12!, {r1, sp, lr} msr spsr_fsxc, r1 - cps #CPSR_MODE_SVC ldm r12, {r1, r2} - push {r1, r2} + /* + * Switching to some other mode than SVC as we need to set spsr in + * order to return into the old state properly and it may be SVC + * mode we're returning to. + */ + cps #CPSR_MODE_ABT + cmp_spsr_user_mode r2 + mov lr, r1 + msr spsr_fsxc, r2 ldm r0, {r0-r12} - - /* Restore CPSR and jump to the instruction to resume at */ - rfefd sp! + movnes pc, lr + b eret_to_user_mode UNWIND( .fnend) END_FUNC thread_resume @@ -441,117 +456,6 @@ UNWIND( .fnend) END_FUNC thread_rpc KEEP_PAGER thread_rpc -/* The handler of native interrupt. */ -.macro native_intr_handler mode:req - .ifc \mode\(),irq - /* - * Foreign interrupts should be masked. - * For GICv2, IRQ is for foreign interrupt and already masked by - * hardware in FIQ mode which is used for native interrupt. - * For GICv3, FIQ is for foreign interrupt. It's not masked by hardware - * in IRQ mode which is used for natvie interrupt. - */ - cpsid f - .endif - /* - * FIQ and IRQ have a +4 offset for lr compared to preferred return - * address - */ - sub lr, lr, #4 - - /* - * We're always saving {r0-r3}. In IRQ mode we're saving r12 also. - * In FIQ mode we're saving the banked fiq registers {r8-r12} FIQ - * because the secure monitor doesn't save those. The treatment of - * the banked fiq registers is somewhat analogous to the lazy save - * of VFP registers. - */ - .ifc \mode\(),fiq - push {r0-r3, r8-r12, lr} - .else - push {r0-r3, r12, lr} - .endif - bl thread_check_canaries - ldr lr, =thread_nintr_handler_ptr - ldr lr, [lr] - blx lr - .ifc \mode\(),fiq - pop {r0-r3, r8-r12, lr} - .else - pop {r0-r3, r12, lr} - .endif - movs pc, lr -.endm - -/* The handler of foreign interrupt. */ -.macro foreign_intr_handler mode:req - .ifc \mode\(),irq - /* - * Disable FIQ if the foreign interrupt is sent as IRQ. - * IRQ mode is set up to use tmp stack so FIQ has to be - * disabled before touching the stack. We can also assign - * SVC sp from IRQ sp to get SVC mode into the state we - * need when doing the SMC below. - * If it is sent as FIQ, the IRQ has already been masked by hardware - */ - cpsid f - .endif - sub lr, lr, #4 - push {lr} - push {r12} - - .ifc \mode\(),fiq - bl thread_save_state_fiq - .else - bl thread_save_state - .endif - - mov r0, #THREAD_FLAGS_EXIT_ON_FOREIGN_INTR - mrs r1, spsr - pop {r12} - pop {r2} - blx thread_state_suspend - mov r4, r0 /* Supply thread index */ - - /* - * Switch to SVC mode and copy current stack pointer as it already - * is the tmp stack. - */ - mov r0, sp - cps #CPSR_MODE_SVC - mov sp, r0 - - ldr r0, =TEESMC_OPTEED_RETURN_CALL_DONE - ldr r1, =OPTEE_SMC_RETURN_RPC_FOREIGN_INTR - mov r2, #0 - mov r3, #0 - /* r4 is already filled in above */ - smc #0 - b . /* SMC should not return */ -.endm - -LOCAL_FUNC thread_fiq_handler , : -UNWIND( .fnstart) -UNWIND( .cantunwind) -#if defined(CFG_ARM_GICV3) - foreign_intr_handler fiq -#else - native_intr_handler fiq -#endif -UNWIND( .fnend) -END_FUNC thread_fiq_handler - -LOCAL_FUNC thread_irq_handler , : -UNWIND( .fnstart) -UNWIND( .cantunwind) -#if defined(CFG_ARM_GICV3) - native_intr_handler irq -#else - foreign_intr_handler irq -#endif -UNWIND( .fnend) -END_FUNC thread_irq_handler - FUNC thread_init_vbar , : UNWIND( .fnstart) /* Set vector (VBAR) */ @@ -620,28 +524,22 @@ UNWIND( .cantunwind) ldr r5, [sp, #(11 * 0x4)] /* user function */ ldr r6, [sp, #(12 * 0x4)] /* spsr */ - /* - * Set the saved Processors Status Register to user mode to allow - * entry of user mode through movs below. - */ - msr spsr_cxsf, r6 - /* * Save old user sp and set new user sp. */ cps #CPSR_MODE_SYS - mov r6, sp + mov r7, sp mov sp, r4 cps #CPSR_MODE_SVC - push {r6,r7} + push {r7,r8} - /* - * Don't allow return from this function, return is done through - * thread_unwind_user_mode() below. - */ - mov lr, #0 - /* Call the user function with its arguments */ - movs pc, r5 + /* Prepare user mode entry via eret_to_user_mode */ + cpsid aif + cps #CPSR_MODE_ABT + msr spsr_fsxc, r6 + mov lr, r5 + + b eret_to_user_mode UNWIND( .fnend) END_FUNC __thread_enter_user_mode @@ -668,48 +566,213 @@ UNWIND( .cantunwind) UNWIND( .fnend) END_FUNC thread_unwind_user_mode -LOCAL_FUNC thread_abort_handler , : -thread_und_handler: -UNWIND( .fnstart) -UNWIND( .cantunwind) + .macro maybe_restore_mapping + /* + * This macro is a bit hard to read due to all the ifdefs, + * we're testing for two different configs which makes four + * different combinations. + * + * - With LPAE, and then some extra code if with + * CFG_CORE_UNMAP_CORE_AT_EL0 + * - Without LPAE, and then some extra code if with + * CFG_CORE_UNMAP_CORE_AT_EL0 + */ + + /* + * At this point we can't rely on any memory being writable + * yet, so we're using TPIDRPRW to store r0, and if with + * LPAE TPIDRURO to store r1 too. + */ + write_tpidrprw r0 + +#ifdef CFG_WITH_LPAE + write_tpidruro r1 + + read_ttbr0_64bit r0, r1 + tst r1, #BIT(TTBR_ASID_SHIFT - 32) + beq 11f + +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + /* + * Update the mapping to use the full kernel mode mapping. + * Since the translation table could reside above 4GB we'll + * have to use 64-bit arithmetics. + */ + subs r0, r0, #CORE_MMU_L1_TBL_OFFSET + sbc r1, r1, #0 +#endif + bic r1, r1, #BIT(TTBR_ASID_SHIFT - 32) + write_ttbr0_64bit r0, r1 + isb + +#else /*!CFG_WITH_LPAE*/ + read_contextidr r0 + tst r0, #1 + beq 11f + + /* Update the mapping to use the full kernel mode mapping. */ + bic r0, r0, #1 + write_contextidr r0 + isb +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + read_ttbr1 r0 + sub r0, r0, #CORE_MMU_L1_TBL_OFFSET + write_ttbr1 r0 + isb +#endif + +#endif /*!CFG_WITH_LPAE*/ + +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + ldr r0, =thread_vect_table + write_vbar r0 + isb + + 11: /* + * The PC is adjusted unconditionally to guard against the + * case there was an FIQ just before we did the "cpsid aif". + */ + ldr r0, =22f + bx r0 + 22: +#else + 11: +#endif + read_tpidrprw r0 +#ifdef CFG_WITH_LPAE + read_tpidruro r1 +#endif + .endm + +/* The handler of native interrupt. */ +.macro native_intr_handler mode:req + cpsid aif + maybe_restore_mapping + /* - * Disable both foreign and native interrupts in the thread handlers. - * The tee handlers can decide when the native interrupts should - * be enabled. + * FIQ and IRQ have a +4 offset for lr compared to preferred return + * address + */ + sub lr, lr, #4 + + /* + * We're always saving {r0-r3}. In IRQ mode we're saving r12 also. + * In FIQ mode we're saving the banked fiq registers {r8-r12} FIQ + * because the secure monitor doesn't save those. The treatment of + * the banked fiq registers is somewhat analogous to the lazy save + * of VFP registers. */ - cpsid f /* IRQ is already masked by the hardware */ + push {r0-r3} + .ifc \mode\(),fiq + push {r8-r12, lr} + .else + push {r12, lr} + .endif + + bl thread_check_canaries + ldr lr, =thread_nintr_handler_ptr + ldr lr, [lr] + blx lr + + .ifc \mode\(),fiq + pop {r8-r12, lr} + .else + pop {r12, lr} + .endif + + mov r0, sp + mrs r1, spsr + mov r2, lr + add sp, sp, #(4 * 4) + cps #CPSR_MODE_ABT + cmp_spsr_user_mode r1 + msr spsr_fsxc, r1 + mov lr, r2 + ldm r0, {r0-r3} + movnes pc, lr + b eret_to_user_mode +.endm + +/* The handler of foreign interrupt. */ +.macro foreign_intr_handler mode:req + cpsid aif + maybe_restore_mapping + + sub lr, lr, #4 + push {lr} + push {r12} + + .ifc \mode\(),fiq + bl thread_save_state_fiq + .else + bl thread_save_state + .endif + + mov r0, #THREAD_FLAGS_EXIT_ON_FOREIGN_INTR + mrs r1, spsr + pop {r12} + pop {r2} + blx thread_state_suspend + mov r4, r0 /* Supply thread index */ + + /* + * Switch to SVC mode and copy current stack pointer as it already + * is the tmp stack. + */ + mov r0, sp + cps #CPSR_MODE_SVC + mov sp, r0 + + ldr r0, =TEESMC_OPTEED_RETURN_CALL_DONE + ldr r1, =OPTEE_SMC_RETURN_RPC_FOREIGN_INTR + mov r2, #0 + mov r3, #0 + /* r4 is already filled in above */ + smc #0 + b . /* SMC should not return */ +.endm + + .section .text.thread_vect_table + .align 5 +FUNC thread_vect_table , : +UNWIND( .fnstart) +UNWIND( .cantunwind) + b . /* Reset */ + b thread_und_handler /* Undefined instruction */ + b thread_svc_handler /* System call */ + b thread_pabort_handler /* Prefetch abort */ + b thread_dabort_handler /* Data abort */ + b . /* Reserved */ + b thread_irq_handler /* IRQ */ + b thread_fiq_handler /* FIQ */ + +thread_und_handler: + cpsid aif + maybe_restore_mapping strd r0, r1, [sp, #THREAD_CORE_LOCAL_R0] mrs r1, spsr tst r1, #CPSR_T subne lr, lr, #2 subeq lr, lr, #4 mov r0, #ABORT_TYPE_UNDEF - b .thread_abort_generic + b thread_abort_common thread_dabort_handler: - /* - * Disable both foreign and native interrupts in the thread handlers. - * The tee handlers can decide when the native interrupts should - * be enabled. - */ - cpsid f /* IRQ is already masked by the hardware */ + cpsid aif + maybe_restore_mapping strd r0, r1, [sp, #THREAD_CORE_LOCAL_R0] sub lr, lr, #8 mov r0, #ABORT_TYPE_DATA - b .thread_abort_generic + b thread_abort_common thread_pabort_handler: - /* - * Disable both foreign and native interrupts in the thread handlers. - * The tee handlers can decide when the native interrupts should - * be enabled. - */ - cpsid f /* IRQ is already masked by the hardware */ + cpsid aif + maybe_restore_mapping strd r0, r1, [sp, #THREAD_CORE_LOCAL_R0] sub lr, lr, #4 mov r0, #ABORT_TYPE_PREFETCH -.thread_abort_generic: +thread_abort_common: /* * At this label: * cpsr is in mode undef or abort @@ -790,45 +853,105 @@ thread_pabort_handler: lsr r0, r0, #THREAD_CLF_SAVED_SHIFT str r0, [sp, #THREAD_CORE_LOCAL_FLAGS] + cmp_spsr_user_mode r1 ldm ip, {r0-r11, ip} + movnes pc, lr + b eret_to_user_mode + /* end thread_abort_common */ - movs pc, lr -UNWIND( .fnend) -END_FUNC thread_abort_handler +thread_svc_handler: + cpsid aif + + maybe_restore_mapping -LOCAL_FUNC thread_svc_handler , : -UNWIND( .fnstart) -UNWIND( .cantunwind) - /* - * Disable both foreign and native interrupts in the thread handlers. - * The tee handlers can decide when the native interrupts should - * be enabled. - */ - cpsid f /* IRQ is already masked by the hardware */ push {r0-r7, lr} mrs r0, spsr push {r0} mov r0, sp bl tee_svc_handler - pop {r0} + cpsid aif /* In case something was unmasked */ + /* Use ip instead of stack pointer as we need to switch mode. */ + mov ip, sp + add sp, #(4 * 10) + cps #CPSR_MODE_ABT + ldr r0, [ip], #4 msr spsr_fsxc, r0 - pop {r0-r7, lr} - movs pc, lr -UNWIND( .fnend) -END_FUNC thread_svc_handler + cmp_spsr_user_mode r0 + ldm ip, {r0-r7, lr} + movnes pc, lr + b eret_to_user_mode + /* end thread_svc_handler */ - .section .text.thread_vect_table - .align 5 -LOCAL_FUNC thread_vect_table , : -UNWIND( .fnstart) -UNWIND( .cantunwind) - b . /* Reset */ - b thread_und_handler /* Undefined instruction */ - b thread_svc_handler /* System call */ - b thread_pabort_handler /* Prefetch abort */ - b thread_dabort_handler /* Data abort */ - b . /* Reserved */ - b thread_irq_handler /* IRQ */ - b thread_fiq_handler /* FIQ */ +thread_fiq_handler: +#if defined(CFG_ARM_GICV3) + foreign_intr_handler fiq +#else + native_intr_handler fiq +#endif + /* end thread_fiq_handler */ + +thread_irq_handler: +#if defined(CFG_ARM_GICV3) + native_intr_handler irq +#else + foreign_intr_handler irq +#endif + /* end thread_irq_handler */ + + /* + * Returns to user mode. + * Expects to be jumped to with lr pointing to the user space + * address to jump to and spsr holding the desired cpsr. Async + * abort, irq and fiq should be masked. + */ +eret_to_user_mode: + write_tpidrprw r0 +#if defined(CFG_CORE_UNMAP_CORE_AT_EL0) || defined(CFG_WITH_LPAE) + write_tpidruro r1 +#endif + +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + ldr r0, =thread_user_kcode_offset + ldr r0, [r0] + adr r1, thread_vect_table + sub r1, r1, r0 + write_vbar r1 + isb + + /* Jump into the reduced mapping before the full mapping is removed */ + ldr r1, =1f + sub r1, r1, r0 + bx r1 +1: +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ + +#ifdef CFG_WITH_LPAE + read_ttbr0_64bit r0, r1 +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + add r0, r0, #CORE_MMU_L1_TBL_OFFSET +#endif + /* switch to user ASID */ + orr r1, r1, #BIT(TTBR_ASID_SHIFT - 32) + write_ttbr0_64bit r0, r1 + isb +#else /*!CFG_WITH_LPAE*/ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + read_ttbr1 r0 + add r0, r0, #CORE_MMU_L1_TBL_OFFSET + write_ttbr1 r0 + isb +#endif + read_contextidr r0 + orr r0, r0, #BIT(0) + write_contextidr r0 + isb +#endif /*!CFG_WITH_LPAE*/ + + read_tpidrprw r0 +#if defined(CFG_CORE_UNMAP_CORE_AT_EL0) || defined(CFG_WITH_LPAE) + read_tpidruro r1 +#endif + + movs pc, lr UNWIND( .fnend) END_FUNC thread_vect_table diff --git a/core/arch/arm/kernel/thread_a64.S b/core/arch/arm/kernel/thread_a64.S index 9cf9203e68e..cdc5c90dfc2 100644 --- a/core/arch/arm/kernel/thread_a64.S +++ b/core/arch/arm/kernel/thread_a64.S @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,12 @@ madd x\res, x\tmp0, x\tmp1, x\res .endm + .macro b_if_spsr_is_el0 reg, label + tbnz \reg, #(SPSR_MODE_RW_32 << SPSR_MODE_RW_SHIFT), \label + tst \reg, #(SPSR_64_MODE_EL_MASK << SPSR_64_MODE_EL_SHIFT) + b.eq \label + .endm + LOCAL_FUNC vector_std_smc_entry , : sub sp, sp, #THREAD_SMC_ARGS_SIZE store_xregs sp, THREAD_SMC_ARGS_X0, 0, 7 @@ -170,12 +177,23 @@ KEEP_PAGER thread_vector_table /* void thread_resume(struct thread_ctx_regs *regs) */ FUNC thread_resume , : load_xregs x0, THREAD_CTX_REGS_SP, 1, 3 + load_xregs x0, THREAD_CTX_REGS_X4, 4, 30 mov sp, x1 msr elr_el1, x2 msr spsr_el1, x3 - load_xregs x0, THREAD_CTX_REGS_X1, 1, 30 + + b_if_spsr_is_el0 w3, 1f + + load_xregs x0, THREAD_CTX_REGS_X1, 1, 3 ldr x0, [x0, THREAD_CTX_REGS_X0] eret + +1: load_xregs x0, THREAD_CTX_REGS_X1, 1, 3 + ldr x0, [x0, THREAD_CTX_REGS_X0] + + msr spsel, #1 + store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 + b eret_to_el0 END_FUNC thread_resume FUNC thread_std_smc_entry , : @@ -313,7 +331,8 @@ FUNC __thread_enter_user_mode , : mov x29, #0 /* Jump into user mode */ - eret + store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 + b eret_to_el0 END_FUNC __thread_enter_user_mode /* @@ -345,10 +364,45 @@ END_FUNC thread_unwind_user_mode .endif .endm + .macro restore_mapping +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + /* Temporarily save x0 */ + msr tpidr_el1, x0 + + /* Update the mapping to use the full kernel mapping */ + mrs x0, ttbr0_el1 + sub x0, x0, #CORE_MMU_L1_TBL_OFFSET + /* switch to kernel mode ASID */ + bic x0, x0, #BIT(TTBR_ASID_SHIFT) + msr ttbr0_el1, x0 + isb + + /* Jump into the full mapping and continue execution */ + ldr x0, =1f + br x0 + 1: + + /* Point to the vector into the full mapping */ + adr x0, thread_vect_table + msr vbar_el1, x0 + isb + + /* Restore x1 */ + mrs x0, tpidr_el1 + store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 +#else + store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + mrs x0, ttbr0_el1 + /* switch to kernel mode ASID */ + bic x0, x0, #BIT(TTBR_ASID_SHIFT) + msr ttbr0_el1, x0 + isb +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ + .endm .section .text.thread_vect_table .align 11 -LOCAL_FUNC thread_vect_table , : +FUNC thread_vect_table , : /* ----------------------------------------------------- * EL1 with SP0 : 0x0 - 0x180 * ----------------------------------------------------- @@ -406,7 +460,8 @@ SErrorSPx: */ .align 7 el0_sync_a64: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + mrs x2, esr_el1 mrs x3, sp_el0 lsr x2, x2, #ESR_EC_SHIFT @@ -417,13 +472,15 @@ el0_sync_a64: .align 7 el0_irq_a64: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + b elx_irq check_vector_size el0_irq_a64 .align 7 el0_fiq_a64: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + b elx_fiq check_vector_size el0_fiq_a64 @@ -438,7 +495,8 @@ SErrorA64: */ .align 7 el0_sync_a32: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + mrs x2, esr_el1 mrs x3, sp_el0 lsr x2, x2, #ESR_EC_SHIFT @@ -449,13 +507,15 @@ el0_sync_a32: .align 7 el0_irq_a32: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + b elx_irq check_vector_size el0_irq_a32 .align 7 el0_fiq_a32: - store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + restore_mapping + b elx_fiq check_vector_size el0_fiq_a32 @@ -464,6 +524,48 @@ SErrorA32: b SErrorA32 check_vector_size SErrorA32 +/* + * We're keeping this code in the same section as the vector to make sure + * that it's always available. + */ +eret_to_el0: + +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + /* Point to the vector into the reduced mapping */ + adr x0, thread_user_kcode_offset + ldr x0, [x0] + adr x1, thread_vect_table + sub x1, x1, x0 + msr vbar_el1, x1 + isb + + /* Jump into the reduced mapping and continue execution */ + ldr x1, =1f + sub x1, x1, x0 + br x1 +1: + + load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 + msr tpidr_el1, x0 + + /* Update the mapping to exclude the full kernel mapping */ + mrs x0, ttbr0_el1 + add x0, x0, #CORE_MMU_L1_TBL_OFFSET + orr x0, x0, #BIT(TTBR_ASID_SHIFT) /* switch to user mode ASID */ + msr ttbr0_el1, x0 + isb + + mrs x0, tpidr_el1 +#else + mrs x0, ttbr0_el1 + orr x0, x0, #BIT(TTBR_ASID_SHIFT) /* switch to user mode ASID */ + msr ttbr0_el1, x0 + isb + load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ + + eret + END_FUNC thread_vect_table LOCAL_FUNC el0_svc , : @@ -526,14 +628,22 @@ LOCAL_FUNC el0_svc , : load_xregs sp, THREAD_SVC_REG_ELR, 0, 1 msr elr_el1, x0 msr spsr_el1, x1 - load_xregs sp, THREAD_SVC_REG_X0, 0, 14 + load_xregs sp, THREAD_SVC_REG_X2, 2, 14 mov x30, sp ldr x0, [x30, #THREAD_SVC_REG_SP_EL0] mov sp, x0 - ldr x0, [x30, THREAD_SVC_REG_X0] + b_if_spsr_is_el0 w1, 1f + ldp x0, x1, [x30, THREAD_SVC_REG_X0] ldr x30, [x30, #THREAD_SVC_REG_X30] eret + +1: ldp x0, x1, [x30, THREAD_SVC_REG_X0] + ldr x30, [x30, #THREAD_SVC_REG_X30] + + msr spsel, #1 + store_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 + b eret_to_el0 END_FUNC el0_svc LOCAL_FUNC el1_sync_abort , : @@ -680,12 +790,17 @@ LOCAL_FUNC el0_sync_abort , : msr sp_el0, x1 /* Update core local flags */ - ldr w0, [sp, #THREAD_CORE_LOCAL_FLAGS] - lsr w0, w0, #THREAD_CLF_SAVED_SHIFT - str w0, [sp, #THREAD_CORE_LOCAL_FLAGS] + ldr w1, [sp, #THREAD_CORE_LOCAL_FLAGS] + lsr w1, w1, #THREAD_CLF_SAVED_SHIFT + str w1, [sp, #THREAD_CORE_LOCAL_FLAGS] - /* Restore x0 to x3 */ - load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + /* Restore x2 to x3 */ + load_xregs sp, THREAD_CORE_LOCAL_X2, 2, 3 + + b_if_spsr_is_el0 w0, eret_to_el0 + + /* Restore x0 to x1 */ + load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 /* Return from exception */ eret @@ -829,8 +944,13 @@ END_FUNC el0_sync_abort lsr w0, w0, #THREAD_CLF_SAVED_SHIFT str w0, [sp, #THREAD_CORE_LOCAL_FLAGS] - /* Restore x0..x3 */ - load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3 + mrs x0, spsr_el1 + /* Restore x2..x3 */ + load_xregs sp, THREAD_CORE_LOCAL_X2, 2, 3 + b_if_spsr_is_el0 w0, eret_to_el0 + + /* Restore x0..x1 */ + load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1 /* Return from exception */ eret diff --git a/core/arch/arm/kernel/thread_private.h b/core/arch/arm/kernel/thread_private.h index f7614d13906..c66b6fbde85 100644 --- a/core/arch/arm/kernel/thread_private.h +++ b/core/arch/arm/kernel/thread_private.h @@ -155,6 +155,17 @@ extern thread_pm_handler_t thread_cpu_resume_handler_ptr; extern thread_pm_handler_t thread_system_off_handler_ptr; extern thread_pm_handler_t thread_system_reset_handler_ptr; + +/* + * During boot note the part of code and data that needs to be mapped while + * in user mode. The provided address and size have to be page aligned. + * Note that the code and data will be mapped at the lowest possible + * addresses available for user space (see core_mmu_get_user_va_range()). + */ +extern long thread_user_kcode_offset; + +void thread_vect_table(void); + /* * Initializes VBAR for current CPU (called by thread_init_per_cpu() */ diff --git a/core/arch/arm/kernel/tlb_helpers_a32.S b/core/arch/arm/kernel/tlb_helpers_a32.S index 2f753906450..bff1ece06a4 100644 --- a/core/arch/arm/kernel/tlb_helpers_a32.S +++ b/core/arch/arm/kernel/tlb_helpers_a32.S @@ -57,9 +57,10 @@ END_FUNC tlbi_mva_allasid /* void tlbi_asid(unsigned long asid); */ FUNC tlbi_asid , : UNWIND( .fnstart) - and r0, r0, #0xff /* Get ASID */ dsb ishst /* Sync with table update */ write_tlbiasidis r0 /* Inval unified TLB by ASID Inner Sharable */ + orr r0, r0, #1 /* Select the kernel ASID */ + write_tlbiasidis r0 /* Inval unified TLB by ASID Inner Sharable */ dsb ish /* Sync with tlb invalidation completion */ isb /* Sync execution on tlb update */ bx lr diff --git a/core/arch/arm/kernel/tlb_helpers_a64.S b/core/arch/arm/kernel/tlb_helpers_a64.S index 2e28d19c0c5..a1a5816ad32 100644 --- a/core/arch/arm/kernel/tlb_helpers_a64.S +++ b/core/arch/arm/kernel/tlb_helpers_a64.S @@ -27,6 +27,7 @@ #include #include +#include /* void tlbi_all(void); */ FUNC tlbi_all , : @@ -49,10 +50,11 @@ END_FUNC tlbi_mva_allasid /* void tlbi_asid(unsigned int asid); */ FUNC tlbi_asid , : - and x0, x0, #TLBI_ASID_MASK lsl x0, x0, #TLBI_ASID_SHIFT dsb ishst /* Sync with table update */ tlbi aside1is, x0 /* Invalidate tlb by asid in inner shareable */ + orr x0, x0, #BIT(TLBI_ASID_SHIFT) /* Select the kernel ASID */ + tlbi aside1is, x0 /* Invalidate tlb by asid in inner shareable */ dsb ish /* Sync with tlb invalidation completion */ isb /* Sync execution on tlb update */ ret diff --git a/core/arch/arm/kernel/user_ta.c b/core/arch/arm/kernel/user_ta.c index efbfc9e8fc2..a96488b26cd 100644 --- a/core/arch/arm/kernel/user_ta.c +++ b/core/arch/arm/kernel/user_ta.c @@ -136,7 +136,7 @@ static TEE_Result load_elf_segments(struct user_ta_ctx *utc, uint32_t mattr; size_t idx = 0; - tee_mmu_map_clear(utc); + tee_mmu_map_init(utc); /* * Add stack segment @@ -501,12 +501,12 @@ static void user_ta_enter_close_session(struct tee_ta_session *s) static void user_ta_dump_state(struct tee_ta_ctx *ctx) { struct user_ta_ctx *utc __maybe_unused = to_user_ta_ctx(ctx); - char flags[4] = { '\0', }; + char flags[7] = { '\0', }; size_t n; EMSG_RAW(" arch: %s load address: 0x%x ctx-idr: %d", utc->is_32bit ? "arm" : "aarch64", utc->load_addr, - utc->context); + utc->mmu->asid); EMSG_RAW(" stack: 0x%" PRIxVA " %zu", utc->mmu->regions[TEE_MMU_UMAP_STACK_IDX].va, utc->mobj_stack->size); @@ -517,8 +517,8 @@ static void user_ta_dump_state(struct tee_ta_ctx *ctx) mobj_get_pa(utc->mmu->regions[n].mobj, utc->mmu->regions[n].offset, 0, &pa); - mattr_uflags_to_str(flags, sizeof(flags), - utc->mmu->regions[n].attr); + mattr_perm_to_str(flags, sizeof(flags), + utc->mmu->regions[n].attr); EMSG_RAW(" region %zu: va %#" PRIxVA " pa %#" PRIxPA " size %#zx flags %s", n, utc->mmu->regions[n].va, pa, @@ -584,7 +584,7 @@ static void user_ta_ctx_destroy(struct tee_ta_ctx *ctx) static uint32_t user_ta_get_instance_id(struct tee_ta_ctx *ctx) { - return to_user_ta_ctx(ctx)->context; + return to_user_ta_ctx(ctx)->mmu->asid; } static const struct tee_ta_ops user_ta_ops __rodata_unpaged = { diff --git a/core/arch/arm/mm/core_mmu.c b/core/arch/arm/mm/core_mmu.c index 75b6e99da3c..87735cf133d 100644 --- a/core/arch/arm/mm/core_mmu.c +++ b/core/arch/arm/mm/core_mmu.c @@ -590,7 +590,7 @@ static void add_va_space(struct tee_mmap_region *memory_map, size_t num_elems, uint32_t core_mmu_type_to_attr(enum teecore_memtypes t) { - const uint32_t attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_GLOBAL; + const uint32_t attr = TEE_MATTR_VALID_BLOCK; const uint32_t cached = TEE_MATTR_CACHE_CACHED << TEE_MATTR_CACHE_SHIFT; const uint32_t noncache = TEE_MATTR_CACHE_NONCACHE << TEE_MATTR_CACHE_SHIFT; diff --git a/core/arch/arm/mm/core_mmu_lpae.c b/core/arch/arm/mm/core_mmu_lpae.c index 89ea9b5f38b..0f94188ac0c 100644 --- a/core/arch/arm/mm/core_mmu_lpae.c +++ b/core/arch/arm/mm/core_mmu_lpae.c @@ -178,8 +178,19 @@ #define MAX_XLAT_TABLES 5 #endif -/* MMU L1 table, one for each core */ -static uint64_t l1_xlation_table[CFG_TEE_CORE_NB_CORE][NUM_L1_ENTRIES] +/* + * MMU L1 table, one for each core + * + * With CFG_CORE_UNMAP_CORE_AT_EL0, each core has one table to be used + * while in kernel mode and one to be used while in user mode. These are + * not static as the symbols are accessed directly from assembly. + */ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 +#define NUM_L1_TABLES 2 +#else +#define NUM_L1_TABLES 1 +#endif +uint64_t l1_xlation_table[NUM_L1_TABLES][CFG_TEE_CORE_NB_CORE][NUM_L1_ENTRIES] __aligned(NUM_L1_ENTRIES * XLAT_ENTRY_SIZE) __section(".nozi.mmu.l1"); static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES] @@ -473,6 +484,12 @@ void core_init_mmu_tables(struct tee_mmap_region *mm) uint64_t max_va = 0; size_t n; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + COMPILE_TIME_ASSERT(CORE_MMU_L1_TBL_OFFSET == + sizeof(l1_xlation_table) / 2); +#endif + + for (n = 0; !core_mmap_is_end_of_table(mm + n); n++) { paddr_t pa_end; vaddr_t va_end; @@ -493,13 +510,13 @@ void core_init_mmu_tables(struct tee_mmap_region *mm) /* Clear table before use */ memset(l1_xlation_table[0], 0, NUM_L1_ENTRIES * XLAT_ENTRY_SIZE); - init_xlation_table(mm, 0, l1_xlation_table[0], 1); + init_xlation_table(mm, 0, l1_xlation_table[0][0], 1); for (n = 1; n < CFG_TEE_CORE_NB_CORE; n++) - memcpy(l1_xlation_table[n], l1_xlation_table[0], + memcpy(l1_xlation_table[0][n], l1_xlation_table[0][0], XLAT_ENTRY_SIZE * NUM_L1_ENTRIES); for (n = 1; n < NUM_L1_ENTRIES; n++) { - if (!l1_xlation_table[0][n]) { + if (!l1_xlation_table[0][0][n]) { user_va_idx = n; break; } @@ -526,7 +543,7 @@ void core_init_mmu_regs(void) uint32_t mair; paddr_t ttbr0; - ttbr0 = virt_to_phys(l1_xlation_table[get_core_pos()]); + ttbr0 = virt_to_phys(l1_xlation_table[0][get_core_pos()]); mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX); mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR, ATTR_IWBWA_OWBWA_NTR_INDEX); @@ -554,7 +571,7 @@ void core_init_mmu_regs(void) uint64_t tcr; paddr_t ttbr0; - ttbr0 = virt_to_phys(l1_xlation_table[get_core_pos()]); + ttbr0 = virt_to_phys(l1_xlation_table[0][get_core_pos()]); mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX); mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR, ATTR_IWBWA_OWBWA_NTR_INDEX); @@ -618,13 +635,13 @@ void core_mmu_create_user_map(struct user_ta_ctx *utc, memset(dir_info.table, 0, PGT_SIZE); core_mmu_populate_user_map(&dir_info, utc); map->user_map = virt_to_phys(dir_info.table) | TABLE_DESC; - map->asid = utc->context & TTBR_ASID_MASK; + map->asid = utc->mmu->asid; } bool core_mmu_find_table(vaddr_t va, unsigned max_level, struct core_mmu_table_info *tbl_info) { - uint64_t *tbl = l1_xlation_table[get_core_pos()]; + uint64_t *tbl = l1_xlation_table[0][get_core_pos()]; uintptr_t ntbl; unsigned level = 1; vaddr_t va_base = 0; @@ -743,7 +760,7 @@ bool core_mmu_user_mapping_is_active(void) uint32_t exceptions = thread_mask_exceptions(THREAD_EXCP_ALL); assert(user_va_idx != -1); - ret = l1_xlation_table[get_core_pos()][user_va_idx]; + ret = l1_xlation_table[0][get_core_pos()][user_va_idx]; thread_unmask_exceptions(exceptions); return ret; @@ -754,7 +771,7 @@ void core_mmu_get_user_map(struct core_mmu_user_map *map) { assert(user_va_idx != -1); - map->user_map = l1_xlation_table[get_core_pos()][user_va_idx]; + map->user_map = l1_xlation_table[0][get_core_pos()][user_va_idx]; if (map->user_map) { map->asid = (read_ttbr0_64bit() >> TTBR_ASID_SHIFT) & TTBR_ASID_MASK; @@ -778,13 +795,21 @@ void core_mmu_set_user_map(struct core_mmu_user_map *map) /* Set the new map */ if (map && map->user_map) { - l1_xlation_table[get_core_pos()][user_va_idx] = map->user_map; + l1_xlation_table[0][get_core_pos()][user_va_idx] = + map->user_map; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + l1_xlation_table[1][get_core_pos()][user_va_idx] = + map->user_map; +#endif dsb(); /* Make sure the write above is visible */ ttbr |= ((uint64_t)map->asid << TTBR_ASID_SHIFT); write_ttbr0_64bit(ttbr); isb(); } else { - l1_xlation_table[get_core_pos()][user_va_idx] = 0; + l1_xlation_table[0][get_core_pos()][user_va_idx] = 0; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + l1_xlation_table[1][get_core_pos()][user_va_idx] = 0; +#endif dsb(); /* Make sure the write above is visible */ } @@ -828,7 +853,7 @@ void core_mmu_get_user_map(struct core_mmu_user_map *map) { assert(user_va_idx != -1); - map->user_map = l1_xlation_table[get_core_pos()][user_va_idx]; + map->user_map = l1_xlation_table[0][get_core_pos()][user_va_idx]; if (map->user_map) { map->asid = (read_ttbr0_el1() >> TTBR_ASID_SHIFT) & TTBR_ASID_MASK; @@ -852,13 +877,21 @@ void core_mmu_set_user_map(struct core_mmu_user_map *map) /* Set the new map */ if (map && map->user_map) { - l1_xlation_table[get_core_pos()][user_va_idx] = map->user_map; + l1_xlation_table[0][get_core_pos()][user_va_idx] = + map->user_map; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + l1_xlation_table[1][get_core_pos()][user_va_idx] = + map->user_map; +#endif dsb(); /* Make sure the write above is visible */ ttbr |= ((uint64_t)map->asid << TTBR_ASID_SHIFT); write_ttbr0_el1(ttbr); isb(); } else { - l1_xlation_table[get_core_pos()][user_va_idx] = 0; + l1_xlation_table[0][get_core_pos()][user_va_idx] = 0; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + l1_xlation_table[1][get_core_pos()][user_va_idx] = 0; +#endif dsb(); /* Make sure the write above is visible */ } diff --git a/core/arch/arm/mm/core_mmu_v7.c b/core/arch/arm/mm/core_mmu_v7.c index 1d5dd887d17..5dad4491f62 100644 --- a/core/arch/arm/mm/core_mmu_v7.c +++ b/core/arch/arm/mm/core_mmu_v7.c @@ -199,8 +199,19 @@ enum desc_type { DESC_TYPE_INVALID, }; -/* Main MMU L1 table for teecore */ -static uint32_t main_mmu_l1_ttb[NUM_L1_ENTRIES] +/* + * Main MMU L1 table for teecore + * + * With CFG_CORE_UNMAP_CORE_AT_EL0, one table to be used while in kernel + * mode and one to be used while in user mode. These are not static as the + * symbols are accessed directly from assembly. + */ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 +#define NUM_L1_TABLES 2 +#else +#define NUM_L1_TABLES 1 +#endif +uint32_t main_mmu_l1_ttb[NUM_L1_TABLES][NUM_L1_ENTRIES] __aligned(L1_ALIGNMENT) __section(".nozi.mmu.l1"); /* L2 MMU tables */ @@ -213,7 +224,7 @@ static uint32_t main_mmu_ul1_ttb[CFG_NUM_THREADS][NUM_UL1_ENTRIES] static vaddr_t core_mmu_get_main_ttb_va(void) { - return (vaddr_t)main_mmu_l1_ttb; + return (vaddr_t)main_mmu_l1_ttb[0]; } static paddr_t core_mmu_get_main_ttb_pa(void) @@ -485,7 +496,7 @@ void core_mmu_create_user_map(struct user_ta_ctx *utc, memset(dir_info.table, 0, dir_info.num_entries * sizeof(uint32_t)); core_mmu_populate_user_map(&dir_info, utc); map->ttbr0 = core_mmu_get_ul1_ttb_pa() | TEE_MMU_DEFAULT_ATTRS; - map->ctxid = utc->context & 0xff; + map->ctxid = utc->mmu->asid; } bool core_mmu_find_table(vaddr_t va, unsigned max_level, @@ -803,8 +814,16 @@ void core_init_mmu_tables(struct tee_mmap_region *mm) void *ttb1 = (void *)core_mmu_get_main_ttb_va(); size_t n; +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + COMPILE_TIME_ASSERT(CORE_MMU_L1_TBL_OFFSET == + sizeof(main_mmu_l1_ttb) / 2); +#endif + /* reset L1 table */ memset(ttb1, 0, L1_TBL_SIZE); +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + memset(main_mmu_l1_ttb[1], 0, sizeof(main_mmu_l1_ttb[1])); +#endif for (n = 0; !core_mmap_is_end_of_table(mm + n); n++) if (!core_mmu_is_dynamic_vaspace(mm + n)) diff --git a/core/arch/arm/mm/mobj.c b/core/arch/arm/mm/mobj.c index 75bfb533d54..0b477c94ca5 100644 --- a/core/arch/arm/mm/mobj.c +++ b/core/arch/arm/mm/mobj.c @@ -44,6 +44,7 @@ #include struct mobj *mobj_sec_ddr; +struct mobj *mobj_tee_ram; /* * mobj_phys implementation diff --git a/core/arch/arm/mm/tee_mmu.c b/core/arch/arm/mm/tee_mmu.c index 0b11f10fa41..c12548a52aa 100644 --- a/core/arch/arm/mm/tee_mmu.c +++ b/core/arch/arm/mm/tee_mmu.c @@ -28,17 +28,18 @@ #include #include +#include #include -#include #include #include -#include -#include -#include -#include +#include #include #include #include +#include +#include +#include +#include #include #include #include @@ -63,8 +64,16 @@ #define TEE_MMU_UCACHE_DEFAULT_ATTR (TEE_MATTR_CACHE_CACHED << \ TEE_MATTR_CACHE_SHIFT) -/* Support for 31 concurrent sessions */ -static uint32_t g_asid = 0xffffffff; +/* + * Two ASIDs per context, one for kernel mode and one for user mode. ASID 0 + * and 1 are reserved and not used. This means a maximum of 31 loaded user + * mode contexts. This value can be increased but not beyond the maximum + * ASID, which is architecture dependent (max 255 for ARMv7-A and ARMv8-A + * Aarch32). + */ +#define MMU_NUM_ASIDS 64 + +static bitstr_t bit_decl(g_asid, MMU_NUM_ASIDS); static TEE_Result tee_mmu_umap_add_param(struct tee_mmu_info *mmu, struct param_mem *mem) @@ -215,34 +224,46 @@ static TEE_Result tee_mmu_umap_set_vas(struct tee_mmu_info *mmu) return TEE_SUCCESS; } -TEE_Result tee_mmu_init(struct user_ta_ctx *utc) +static unsigned int asid_alloc(void) { - uint32_t asid = 1; - bool asid_allocated = false; + int i; - if (!utc->context) { - utc->context = 1; + bit_ffc(g_asid, MMU_NUM_ASIDS, &i); + if (i == -1) + return 0; + bit_set(g_asid, i); - /* Find available ASID */ - while (!(asid & g_asid) && (asid != 0)) { - utc->context++; - asid = asid << 1; - } + return (i + 1) * 2; +} - if (asid == 0) { - DMSG("Failed to allocate ASID"); - return TEE_ERROR_GENERIC; - } - g_asid &= ~asid; - asid_allocated = true; +static void asid_free(unsigned int asid) +{ + /* Only even ASIDs are supposed to be allocated */ + assert(!(asid & 1)); + + if (asid) { + int i = (asid - 1) / 2; + + assert(i < MMU_NUM_ASIDS && bit_test(g_asid, i)); + bit_clear(g_asid, i); + } +} + +TEE_Result tee_mmu_init(struct user_ta_ctx *utc) +{ + uint32_t asid = asid_alloc(); + + if (!asid) { + DMSG("Failed to allocate ASID"); + return TEE_ERROR_GENERIC; } utc->mmu = calloc(1, sizeof(struct tee_mmu_info)); if (!utc->mmu) { - if (asid_allocated) - g_asid |= asid; + asid_free(asid); return TEE_ERROR_OUT_OF_MEMORY; } + utc->mmu->asid = asid; core_mmu_get_user_va_range(&utc->mmu->ta_private_vmem_start, NULL); return TEE_SUCCESS; } @@ -285,6 +306,18 @@ static void free_pgt(struct user_ta_ctx *utc, vaddr_t base, size_t size) pgt_flush_ctx_range(pgt_cache, &utc->ctx, base, base + size); } +static vaddr_t get_stack_va(struct user_ta_ctx *utc) +{ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + struct tee_ta_region *r = utc->mmu->regions + + TEE_MMU_UMAP_STACK_IDX - 1; + + return r->va + r->size; +#else + return utc->mmu->ta_private_vmem_start; +#endif +} + void tee_mmu_map_stack(struct user_ta_ctx *utc, struct mobj *mobj) { const size_t granule = CORE_MMU_USER_CODE_SIZE; @@ -293,7 +326,7 @@ void tee_mmu_map_stack(struct user_ta_ctx *utc, struct mobj *mobj) region->mobj = mobj; region->offset = 0; - region->va = utc->mmu->ta_private_vmem_start; + region->va = get_stack_va(utc); region->size = ROUNDUP(utc->mobj_stack->size, granule); region->attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_SECURE | TEE_MATTR_URW | TEE_MATTR_PRW | @@ -392,10 +425,28 @@ TEE_Result tee_mmu_map_add_segment(struct user_ta_ctx *utc, struct mobj *mobj, utc->mmu->ta_private_vmem_end); } -void tee_mmu_map_clear(struct user_ta_ctx *utc) +static void map_kinit(struct user_ta_ctx *utc __maybe_unused) +{ +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 + struct tee_ta_region *regions = utc->mmu->regions; + const uint32_t attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_SECURE | + (TEE_MATTR_CACHE_CACHED << TEE_MATTR_CACHE_SHIFT); + unsigned int idx; + + /* Add an entry for kernel code being mapped while in user mode */ + idx = TEE_MMU_UMAP_KCODE_IDX; + thread_get_user_kcode(®ions[idx].mobj, ®ions[idx].offset, + ®ions[idx].va, ®ions[idx].size); + regions[idx].attr = attr | TEE_MATTR_PRX; + assert(regions[idx].va == utc->mmu->ta_private_vmem_start); +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ +} + +void tee_mmu_map_init(struct user_ta_ctx *utc) { utc->mmu->ta_private_vmem_end = 0; memset(utc->mmu->regions, 0, sizeof(utc->mmu->regions)); + map_kinit(utc); } static void clear_param_map(struct user_ta_ctx *utc) @@ -624,15 +675,10 @@ void tee_mmu_rem_rwmem(struct user_ta_ctx *utc, struct mobj *mobj, vaddr_t va) */ void tee_mmu_final(struct user_ta_ctx *utc) { - uint32_t asid = 1 << ((utc->context - 1) & 0xff); - - /* return ASID */ - g_asid |= asid; - /* clear MMU entries to avoid clash when asid is reused */ - tlbi_asid(utc->context & 0xff); - utc->context = 0; + tlbi_asid(utc->mmu->asid); + asid_free(utc->mmu->asid); free(utc->mmu); utc->mmu = NULL; } diff --git a/core/arch/arm/mm/tee_pager.c b/core/arch/arm/mm/tee_pager.c index a06c8ecff78..099dc2ca8d3 100644 --- a/core/arch/arm/mm/tee_pager.c +++ b/core/arch/arm/mm/tee_pager.c @@ -476,7 +476,7 @@ static void *pager_add_alias_page(paddr_t pa) unsigned idx; struct core_mmu_table_info *ti; /* Alias pages mapped without write permission: runtime will care */ - uint32_t attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_GLOBAL | + uint32_t attr = TEE_MATTR_VALID_BLOCK | (TEE_MATTR_CACHE_CACHED << TEE_MATTR_CACHE_SHIFT) | TEE_MATTR_SECURE | TEE_MATTR_PR; @@ -627,9 +627,6 @@ static uint32_t get_area_mattr(uint32_t area_flags) TEE_MATTR_CACHE_CACHED << TEE_MATTR_CACHE_SHIFT | (area_flags & (TEE_MATTR_PRWX | TEE_MATTR_URWX)); - if (!(area_flags & (TEE_MATTR_UR | TEE_MATTR_UX | TEE_MATTR_UW))) - attr |= TEE_MATTR_GLOBAL; - return attr; } diff --git a/core/arch/arm/plat-imx/pm/pm-imx7.c b/core/arch/arm/plat-imx/pm/pm-imx7.c index 64cd54f84a2..5d1284f3530 100644 --- a/core/arch/arm/plat-imx/pm/pm-imx7.c +++ b/core/arch/arm/plat-imx/pm/pm-imx7.c @@ -134,7 +134,7 @@ int pm_imx7_iram_tbl_init(void) map.size = AIPS1_SIZE; /* 4M for AIPS1/2/3 */ map.type = MEM_AREA_IO_SEC; map.attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_PRW | - TEE_MATTR_GLOBAL | TEE_MATTR_SECURE | + TEE_MATTR_SECURE | (TEE_MATTR_CACHE_NONCACHE << TEE_MATTR_CACHE_SHIFT); map_memarea_sections(&map, (uint32_t *)iram_tbl_virt_addr); } @@ -145,8 +145,7 @@ int pm_imx7_iram_tbl_init(void) map.region_size = CORE_MMU_PGDIR_SIZE; map.size = CORE_MMU_DEVICE_SIZE; map.type = MEM_AREA_TEE_COHERENT; - map.attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_PRWX | TEE_MATTR_GLOBAL | - TEE_MATTR_SECURE; + map.attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_PRWX | TEE_MATTR_SECURE; map_memarea_sections(&map, (uint32_t *)iram_tbl_virt_addr); map.pa = GIC_BASE; @@ -154,8 +153,7 @@ int pm_imx7_iram_tbl_init(void) map.region_size = CORE_MMU_PGDIR_SIZE; map.size = CORE_MMU_DEVICE_SIZE; map.type = MEM_AREA_TEE_COHERENT; - map.attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_PRW | TEE_MATTR_GLOBAL | - TEE_MATTR_SECURE; + map.attr = TEE_MATTR_VALID_BLOCK | TEE_MATTR_PRW | TEE_MATTR_SECURE; map_memarea_sections(&map, (uint32_t *)iram_tbl_virt_addr); return 0; diff --git a/core/arch/arm/tee/entry_std.c b/core/arch/arm/tee/entry_std.c index d9cdc013d68..1a84fc69de3 100644 --- a/core/arch/arm/tee/entry_std.c +++ b/core/arch/arm/tee/entry_std.c @@ -31,9 +31,10 @@ #include #include #include +#include +#include #include #include -#include #include #include #include @@ -592,6 +593,14 @@ static TEE_Result default_mobj_init(void) if (!mobj_sec_ddr) panic("Failed to register secure ta ram"); + mobj_tee_ram = mobj_phys_alloc(CFG_TEE_RAM_START, + VCORE_UNPG_RW_PA + VCORE_UNPG_RW_SZ - + CFG_TEE_RAM_START, + TEE_MATTR_CACHE_CACHED, + CORE_MEM_TEE_RAM); + if (!mobj_tee_ram) + panic("Failed to register tee ram"); + #ifdef CFG_SECURE_DATA_PATH sdp_mem_mobjs = core_sdp_mem_create_mobjs(); if (!sdp_mem_mobjs) diff --git a/core/include/mm/tee_mmu.h b/core/include/mm/tee_mmu.h index 062196d9a6c..07902719d49 100644 --- a/core/include/mm/tee_mmu.h +++ b/core/include/mm/tee_mmu.h @@ -50,7 +50,7 @@ void tee_mmu_map_stack(struct user_ta_ctx *utc, struct mobj *mobj); TEE_Result tee_mmu_map_add_segment(struct user_ta_ctx *utc, struct mobj *mobj, size_t offs, size_t size, uint32_t prot); -void tee_mmu_map_clear(struct user_ta_ctx *utc); +void tee_mmu_map_init(struct user_ta_ctx *utc); /* Map parameters for a user TA */ TEE_Result tee_mmu_map_param(struct user_ta_ctx *utc, diff --git a/core/include/mm/tee_mmu_types.h b/core/include/mm/tee_mmu_types.h index 855136bfd28..dba07bee1e7 100644 --- a/core/include/mm/tee_mmu_types.h +++ b/core/include/mm/tee_mmu_types.h @@ -57,8 +57,13 @@ #define TEE_MATTR_LOCKED (1 << 15) +#ifdef CFG_CORE_UNMAP_CORE_AT_EL0 +#define TEE_MMU_UMAP_KCODE_IDX 0 +#define TEE_MMU_UMAP_STACK_IDX 1 +#else #define TEE_MMU_UMAP_STACK_IDX 0 -#define TEE_MMU_UMAP_CODE_IDX 1 +#endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/ +#define TEE_MMU_UMAP_CODE_IDX (TEE_MMU_UMAP_STACK_IDX + 1) #define TEE_MMU_UMAP_NUM_CODE_SEGMENTS 3 #define TEE_MMU_UMAP_PARAM_IDX (TEE_MMU_UMAP_CODE_IDX + \ @@ -87,17 +92,21 @@ struct tee_mmu_info { struct tee_ta_region regions[TEE_MMU_UMAP_MAX_ENTRIES]; vaddr_t ta_private_vmem_start; vaddr_t ta_private_vmem_end; + unsigned int asid; }; -static inline void mattr_uflags_to_str(char *str, size_t size, uint32_t attr) +static inline void mattr_perm_to_str(char *str, size_t size, uint32_t attr) { - if (size < 4) + if (size < 7) return; str[0] = (attr & TEE_MATTR_UR) ? 'r' : '-'; str[1] = (attr & TEE_MATTR_UW) ? 'w' : '-'; str[2] = (attr & TEE_MATTR_UX) ? 'x' : '-'; - str[3] = '\0'; + str[3] = (attr & TEE_MATTR_PR) ? 'R' : '-'; + str[4] = (attr & TEE_MATTR_PW) ? 'W' : '-'; + str[5] = (attr & TEE_MATTR_PX) ? 'X' : '-'; + str[6] = '\0'; } #endif diff --git a/documentation/optee_design.md b/documentation/optee_design.md index 2535eb87966..f93d468b727 100644 --- a/documentation/optee_design.md +++ b/documentation/optee_design.md @@ -150,6 +150,15 @@ table when the TA context is activated. ![Select xlation table](images/xlat_table.png "Select xlation table") +## Translation tables and switching to user mode +This section only applies with `CFG_WITH_LPAE=n` and +`CFG_CORE_UNMAP_CORE_AT_EL0=y`. + +When switching to user mode only a minimal kernel mode mapping is kept. +This is achieved by selecting a zeroed out big L1 translation in TTBR1 when +transitioning to user mode. When returning back to kernel mode the original +L1 translation table is restored in TTBR1. + ## Translation tables and switching to normal world When switching to normal world either via a foreign interrupt or RPC there is a chance that secure world will resume execution on a different CPU.