diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 671cfeccf04e94..d291ff3065dc90 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -65,6 +65,27 @@ static __always_inline u64 rsvd_bits(int s, int e) return ((2ULL << (e - s)) - 1) << s; } +/* + * The number of non-reserved physical address bits irrespective of features + * that repurpose legal bits, e.g. MKTME. + */ +extern u8 __read_mostly shadow_phys_bits; + +static inline gfn_t kvm_mmu_max_gfn_host(void) +{ + /* + * Disallow SPTEs (via memslots or cached MMIO) whose gfn would exceed + * host.MAXPHYADDR. Assuming KVM is running on bare metal, guest + * accesses beyond host.MAXPHYADDR will hit a #PF(RSVD) and never hit + * an EPT Violation/Misconfig / #NPF, and so KVM will never install a + * SPTE for such addresses. That doesn't hold true if KVM is running + * as a VM itself, e.g. if the MAXPHYADDR KVM sees is less than + * hardware's real MAXPHYADDR, but since KVM can't honor such behavior + * on bare metal, disallow it entirely to simplify e.g. the TDP MMU. + */ + return (1ULL << (shadow_phys_bits - PAGE_SHIFT)) - 1; +} + void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 904f0faff2186f..c10ae25135e3f5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3010,9 +3010,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa /* * If MMIO caching is disabled, emulate immediately without * touching the shadow page tables as attempting to install an - * MMIO SPTE will just be an expensive nop. + * MMIO SPTE will just be an expensive nop. Do not cache MMIO + * whose gfn is greater than host.MAXPHYADDR, any guest that + * generates such gfns is either malicious or in the weeds. + * Note, it's possible to observe a gfn > host.MAXPHYADDR if + * and only if host.MAXPHYADDR is inaccurate with respect to + * hardware behavior, e.g. if KVM itself is running as a VM. */ - if (unlikely(!enable_mmio_caching)) { + if (unlikely(!enable_mmio_caching) || + unlikely(fault->gfn > kvm_mmu_max_gfn_host())) { *ret_val = RET_PF_EMULATE; return true; } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index ad8ce3c5d08396..43eceb821b3157 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -203,12 +203,6 @@ static inline bool is_removed_spte(u64 spte) */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; -/* - * The number of non-reserved physical address bits irrespective of features - * that repurpose legal bits, e.g. MKTME. - */ -extern u8 __read_mostly shadow_phys_bits; - static inline bool is_mmio_spte(u64 spte) { return (spte & shadow_mmio_mask) == shadow_mmio_value && diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 566548a3efa731..a81994ad43de2e 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -815,14 +815,15 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm, return iter->yielded; } -static inline gfn_t tdp_mmu_max_gfn_host(void) +static inline gfn_t tdp_mmu_max_exclusive_gfn_host(void) { /* - * Bound TDP MMU walks at host.MAXPHYADDR, guest accesses beyond that - * will hit a #PF(RSVD) and never hit an EPT Violation/Misconfig / #NPF, - * and so KVM will never install a SPTE for such addresses. + * Bound TDP MMU walks at host.MAXPHYADDR. KVM disallows memslots with + * a gpa range that would exceed the max gfn, and KVM does not create + * MMIO SPTEs for "impossible" gfns, instead sending such accesses down + * the slow emulation path every time. */ - return 1ULL << (shadow_phys_bits - PAGE_SHIFT); + return kvm_mmu_max_gfn_host() + 1; } static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, @@ -830,7 +831,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, { struct tdp_iter iter; - gfn_t end = tdp_mmu_max_gfn_host(); + gfn_t end = tdp_mmu_max_exclusive_gfn_host(); gfn_t start = 0; for_each_tdp_pte_min_level(iter, root, zap_level, start, end) { @@ -923,7 +924,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, { struct tdp_iter iter; - end = min(end, tdp_mmu_max_gfn_host()); + end = min(end, tdp_mmu_max_exclusive_gfn_host()); lockdep_assert_held_write(&kvm->mmu_lock); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 951d0a78ccdae1..26ea5999d72b08 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12078,8 +12078,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *new, enum kvm_mr_change change) { - if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) + if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) { + if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn_host()) + return -EINVAL; + return kvm_alloc_memslot_metadata(kvm, new); + } if (change == KVM_MR_FLAGS_ONLY) memcpy(&new->arch, &old->arch, sizeof(old->arch));