Skip to content

Commit

Permalink
powerpc/pseries: Add support for hash table resizing
Browse files Browse the repository at this point in the history
This adds support for using two hypercalls to change the size of the
main hash page table while running as a PAPR guest. For now these
hypercalls are only in experimental qemu versions.

The interface is two part: first H_RESIZE_HPT_PREPARE is used to
allocate and prepare the new hash table. This may be slow, but can be
done asynchronously. Then, H_RESIZE_HPT_COMMIT is used to switch to the
new hash table. This requires that no CPUs be concurrently updating the
HPT, and so must be run under stop_machine().

This also adds a debugfs file which can be used to manually control
HPT resizing or testing purposes.

Signed-off-by: David Gibson <[email protected]>
Reviewed-by: Paul Mackerras <[email protected]>
[mpe: Rename the debugfs file to "hpt_order"]
Signed-off-by: Michael Ellerman <[email protected]>
  • Loading branch information
dgibson authored and mpe committed Feb 10, 2017
1 parent 64b40ff commit dbcf929
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/book3s/64/mmu-hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ struct mmu_hash_ops {
unsigned long addr,
unsigned char *hpte_slot_array,
int psize, int ssize, int local);
int (*resize_hpt)(unsigned long shift);
/*
* Special for kexec.
* To be called in real mode with interrupts disabled. No locks are
Expand Down
33 changes: 33 additions & 0 deletions arch/powerpc/mm/hash_utils_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
#include <linux/debugfs.h>

#include <asm/debug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
Expand Down Expand Up @@ -1795,3 +1797,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
}

#ifdef CONFIG_DEBUG_FS

static int hpt_order_get(void *data, u64 *val)
{
*val = ppc64_pft_size;
return 0;
}

static int hpt_order_set(void *data, u64 val)
{
if (!mmu_hash_ops.resize_hpt)
return -ENODEV;

return mmu_hash_ops.resize_hpt(val);
}

DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");

static int __init hash64_debugfs(void)
{
if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
NULL, &fops_hpt_order)) {
pr_err("lpar: unable to create hpt_order debugsfs file\n");
}

return 0;
}
machine_device_initcall(pseries, hash64_debugfs);

#endif /* CONFIG_DEBUG_FS */
109 changes: 109 additions & 0 deletions arch/powerpc/platforms/pseries/lpar.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <linux/console.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
Expand Down Expand Up @@ -609,6 +611,112 @@ static int __init disable_bulk_remove(char *str)

__setup("bulk_remove=", disable_bulk_remove);

#define HPT_RESIZE_TIMEOUT 10000 /* ms */

struct hpt_resize_state {
unsigned long shift;
int commit_rc;
};

static int pseries_lpar_resize_hpt_commit(void *data)
{
struct hpt_resize_state *state = data;

state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
if (state->commit_rc != H_SUCCESS)
return -EIO;

/* Hypervisor has transitioned the HTAB, update our globals */
ppc64_pft_size = state->shift;
htab_size_bytes = 1UL << ppc64_pft_size;
htab_hash_mask = (htab_size_bytes >> 7) - 1;

return 0;
}

/* Must be called in user context */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
struct hpt_resize_state state = {
.shift = shift,
.commit_rc = H_FUNCTION,
};
unsigned int delay, total_delay = 0;
int rc;
ktime_t t0, t1, t2;

might_sleep();

if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
return -ENODEV;

printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
shift);

t0 = ktime_get();

rc = plpar_resize_hpt_prepare(0, shift);
while (H_IS_LONG_BUSY(rc)) {
delay = get_longbusy_msecs(rc);
total_delay += delay;
if (total_delay > HPT_RESIZE_TIMEOUT) {
/* prepare with shift==0 cancels an in-progress resize */
rc = plpar_resize_hpt_prepare(0, 0);
if (rc != H_SUCCESS)
printk(KERN_WARNING
"lpar: Unexpected error %d cancelling timed out HPT resize\n",
rc);
return -ETIMEDOUT;
}
msleep(delay);
rc = plpar_resize_hpt_prepare(0, shift);
};

switch (rc) {
case H_SUCCESS:
/* Continue on */
break;

case H_PARAMETER:
return -EINVAL;
case H_RESOURCE:
return -EPERM;
default:
printk(KERN_WARNING
"lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
rc);
return -EIO;
}

t1 = ktime_get();

rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);

t2 = ktime_get();

if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
printk(KERN_WARNING
"lpar: Hash collision while resizing HPT\n");
return -ENOSPC;

default:
printk(KERN_WARNING
"lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
state.commit_rc);
return -EIO;
};
}

printk(KERN_INFO
"lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
shift, (long long) ktime_ms_delta(t1, t0),
(long long) ktime_ms_delta(t2, t1));

return 0;
}

void __init hpte_init_pseries(void)
{
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
Expand All @@ -620,6 +728,7 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
}

#ifdef CONFIG_PPC_SMLPAR
Expand Down

0 comments on commit dbcf929

Please sign in to comment.