From 80883d09578cb9aa2ae7e843cd1a26da08907494 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 9 Jul 2024 18:04:31 -0400 Subject: [PATCH] kernel: mmu: make demand paging work on SMP This is the minimum for demand paging to work on SMP systems. Signed-off-by: Nicolas Pitre --- kernel/mmu.c | 129 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 85 insertions(+), 44 deletions(-) diff --git a/kernel/mmu.c b/kernel/mmu.c index 401e824b8564455..7cafdded1fabf93 100644 --- a/kernel/mmu.c +++ b/kernel/mmu.c @@ -662,15 +662,16 @@ void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_ arch_mem_map(dst, phys, size, flags); } - if (!uninit) { +out: + k_spin_unlock(&z_mm_lock, key); + + if (dst != NULL && !uninit) { /* If we later implement mappings to a copy-on-write * zero page, won't need this step */ memset(dst, 0, size); } -out: - k_spin_unlock(&z_mm_lock, key); return dst; } @@ -1236,14 +1237,18 @@ static inline void do_backing_store_page_out(uintptr_t location) #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ } -/* Current implementation relies on interrupt locking to any prevent page table - * access, which falls over if other CPUs are active. Addressing this is not - * as simple as using spinlocks as regular memory reads/writes constitute - * "access" in this sense. - * - * Current needs for demand paging are on uniprocessor systems. +#if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ) +/* + * SMP support is very simple. Some resources such as the scratch page could + * be made per CPU, backing store driver execution be confined to the faulting + * CPU, statistics be made to cope with access concurrency, etc. But in the + * end we're dealing with memory transfer to/from some external storage which + * is inherently slow and whose access is most likely serialized anyway. + * So let's simply enforce global demand paging serialization across all CPUs + * with a mutex as there is no real gain from added parallelism here. */ -BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP)); +static K_MUTEX_DEFINE(z_mm_paging_lock); +#endif static void virt_region_foreach(void *addr, size_t size, void (*func)(void *)) @@ -1333,16 +1338,21 @@ static int do_mem_evict(void *addr) bool dirty; struct k_mem_page_frame *pf; uintptr_t location; - int key, ret; + k_spinlock_key_t key; uintptr_t flags, phys; + int ret; #if CONFIG_DEMAND_PAGING_ALLOW_IRQ __ASSERT(!k_is_in_isr(), "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", __func__); +#ifdef CONFIG_SMP + k_mutex_lock(&z_mm_paging_lock, K_FOREVER); +#else k_sched_lock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); flags = arch_page_info_get(addr, &phys, false); __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, "address %p isn't mapped", addr); @@ -1362,19 +1372,23 @@ static int do_mem_evict(void *addr) __ASSERT(ret == 0, "failed to prepare page frame"); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ if (dirty) { do_backing_store_page_out(location); } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ page_frame_free_locked(pf); out: - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ +#ifdef CONFIG_SMP + k_mutex_unlock(&z_mm_paging_lock); +#else k_sched_unlock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return ret; } @@ -1400,11 +1414,12 @@ int k_mem_page_out(void *addr, size_t size) int k_mem_page_frame_evict(uintptr_t phys) { - int key, ret; + k_spinlock_key_t key; struct k_mem_page_frame *pf; bool dirty; uintptr_t flags; uintptr_t location; + int ret; __ASSERT(page_frames_initialized, "%s called on 0x%lx too early", __func__, phys); @@ -1417,9 +1432,13 @@ int k_mem_page_frame_evict(uintptr_t phys) __ASSERT(!k_is_in_isr(), "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", __func__); +#ifdef CONFIG_SMP + k_mutex_lock(&z_mm_paging_lock, K_FOREVER); +#else k_sched_lock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); pf = k_mem_phys_to_page_frame(phys); if (!k_mem_page_frame_is_mapped(pf)) { /* Nothing to do, free page */ @@ -1436,19 +1455,23 @@ int k_mem_page_frame_evict(uintptr_t phys) } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ if (dirty) { do_backing_store_page_out(location); } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - key = irq_lock(); + k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ page_frame_free_locked(pf); out: - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ +#ifdef CONFIG_SMP + k_mutex_unlock(&z_mm_paging_lock); +#else k_sched_unlock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return ret; } @@ -1549,12 +1572,13 @@ static inline struct k_mem_page_frame *do_eviction_select(bool *dirty) static bool do_page_fault(void *addr, bool pin) { struct k_mem_page_frame *pf; - int key, ret; + k_spinlock_key_t key; uintptr_t page_in_location, page_out_location; enum arch_page_location status; bool result; bool dirty = false; - struct k_thread *faulting_thread = _current_cpu->current; + struct k_thread *faulting_thread; + int ret; __ASSERT(page_frames_initialized, "page fault at %p happened too early", addr); @@ -1568,13 +1592,11 @@ static bool do_page_fault(void *addr, bool pin) */ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - /* We lock the scheduler so that other threads are never scheduled - * during the page-in/out operation. - * - * We do however re-enable interrupts during the page-in/page-out - * operation if and only if interrupts were enabled when the exception - * was taken; in this configuration page faults in an ISR are a bug; - * all their code/data must be pinned. + /* + * We do re-enable interrupts during the page-in/page-out operation + * if and only if interrupts were enabled when the exception was + * taken; in this configuration page faults in an ISR are a bug; all + * their code/data must be pinned. * * If interrupts were disabled when the exception was taken, the * arch code is responsible for keeping them that way when entering @@ -1584,20 +1606,35 @@ static bool do_page_fault(void *addr, bool pin) * entire operation. This is far worse for system interrupt latency * but requires less pinned pages and ISRs may also take page faults. * - * Support for allowing k_mem_paging_backing_store_page_out() and + * On UP we lock the scheduler so that other threads are never + * scheduled during the page-in/out operation. Support for + * allowing k_mem_paging_backing_store_page_out() and * k_mem_paging_backing_store_page_in() to also sleep and allow * other threads to run (such as in the case where the transfer is - * async DMA) is not implemented. Even if limited to thread context, - * arbitrary memory access triggering exceptions that put a thread to - * sleep on a contended page fault operation will break scheduling - * assumptions of cooperative threads or threads that implement - * crticial sections with spinlocks or disabling IRQs. + * async DMA) is not supported on UP. Even if limited to thread + * context, arbitrary memory access triggering exceptions that put + * a thread to sleep on a contended page fault operation will break + * scheduling assumptions of cooperative threads or threads that + * implement critical sections with spinlocks or disabling IRQs. + * + * On SMP, though, exclusivity cannot be assumed solely from being + * a cooperative thread. Another thread with any prio may be running + * on another CPU so exclusion must already be enforced by other + * means. Therefore trying to prevent scheduling on SMP is pointless, + * and k_sched_lock() is equivalent to a no-op on SMP anyway. + * As a result, sleeping/rescheduling in the SMP case is fine. */ - k_sched_lock(); __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden"); +#ifdef CONFIG_SMP + k_mutex_lock(&z_mm_paging_lock, K_FOREVER); +#else + k_sched_lock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); + faulting_thread = _current_cpu->current; + status = arch_page_location_get(addr, &page_in_location); if (status == ARCH_PAGE_LOCATION_BAD) { /* Return false to treat as a fatal error */ @@ -1628,7 +1665,7 @@ static bool do_page_fault(void *addr, bool pin) __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT, "unexpected status value %d", status); - paging_stats_faults_inc(faulting_thread, key); + paging_stats_faults_inc(faulting_thread, key.key); pf = free_page_frame_list_get(); if (pf == NULL) { @@ -1645,7 +1682,7 @@ static bool do_page_fault(void *addr, bool pin) __ASSERT(ret == 0, "failed to prepare page frame"); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); /* Interrupts are now unlocked if they were not locked when we entered * this function, and we may service ISRs. The scheduler is still * locked. @@ -1657,7 +1694,7 @@ static bool do_page_fault(void *addr, bool pin) do_backing_store_page_in(page_in_location); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED); @@ -1672,9 +1709,13 @@ static bool do_page_fault(void *addr, bool pin) k_mem_paging_eviction_add(pf); } out: - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ +#ifdef CONFIG_SMP + k_mutex_unlock(&z_mm_paging_lock); +#else k_sched_unlock(); +#endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return result; @@ -1722,10 +1763,10 @@ bool k_mem_page_fault(void *addr) static void do_mem_unpin(void *addr) { struct k_mem_page_frame *pf; - unsigned int key; + k_spinlock_key_t key; uintptr_t flags, phys; - key = irq_lock(); + key = k_spin_lock(&z_mm_lock); flags = arch_page_info_get(addr, &phys, false); __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, "invalid data page at %p", addr); @@ -1736,7 +1777,7 @@ static void do_mem_unpin(void *addr) k_mem_paging_eviction_add(pf); } } - irq_unlock(key); + k_spin_unlock(&z_mm_lock, key); } void k_mem_unpin(void *addr, size_t size)