Skip to content

Commit

Permalink
arm64: support page mapping percpu first chunk allocator
Browse files Browse the repository at this point in the history
Percpu embedded first chunk allocator is the firstly option, but it
could fails on ARM64, eg,

  percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000
  percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000
  percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000

then we could get

  WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838

and the system could not boot successfully.

Let's implement page mapping percpu first chunk allocator as a fallback
to the embedding allocator to increase the robustness of the system.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Kefeng Wang <[email protected]>
Reviewed-by: Catalin Marinas <[email protected]>
Cc: Andrey Konovalov <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Marco Elver <[email protected]>
Cc: Will Deacon <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Kefeng Wang authored and torvalds committed Nov 6, 2021
1 parent 0eb6843 commit 09cea61
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 10 deletions.
4 changes: 4 additions & 0 deletions arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
depends on NUMA

config NEED_PER_CPU_PAGE_FIRST_CHUNK
def_bool y
depends on NUMA

source "kernel/Kconfig.hz"

config ARCH_SPARSEMEM_ENABLE
Expand Down
82 changes: 72 additions & 10 deletions drivers/base/arch_numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/of.h>

#include <asm/sections.h>
#include <asm/pgalloc.h>

struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
Expand Down Expand Up @@ -168,22 +169,83 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
memblock_free_early(__pa(ptr), size);
}

#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
static void __init pcpu_populate_pte(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;

p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
pud_t *new;

new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!new)
goto err_alloc;
p4d_populate(&init_mm, p4d, new);
}

pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new;

new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!new)
goto err_alloc;
pud_populate(&init_mm, pud, new);
}

pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) {
pte_t *new;

new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!new)
goto err_alloc;
pmd_populate_kernel(&init_mm, pmd, new);
}

return;

err_alloc:
panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
}
#endif

void __init setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
int rc;
int rc = -EINVAL;

if (pcpu_chosen_fc != PCPU_FC_PAGE) {
/*
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
#endif
}

/*
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
pcpu_fc_alloc,
pcpu_fc_free,
pcpu_populate_pte);
#endif
if (rc < 0)
panic("Failed to initialize percpu areas.");
panic("Failed to initialize percpu areas (err=%d).", rc);

delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu)
Expand Down

0 comments on commit 09cea61

Please sign in to comment.