diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 136 |
1 files changed, 76 insertions, 60 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 69088a1ba509..e7c2c1428a69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm->arch.indirect_shadow_pages--; } -static int has_wrprotected_page(struct kvm_vcpu *vcpu, - gfn_t gfn, - int level) +static int __has_wrprotected_page(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { - struct kvm_memory_slot *slot; struct kvm_lpage_info *linfo; - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (slot) { linfo = lpage_info_slot(gfn, slot, level); return linfo->write_count; @@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu, return 1; } +static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) +{ + struct kvm_memory_slot *slot; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + return __has_wrprotected_page(gfn, level, slot); +} + static int host_mapping_level(struct kvm *kvm, gfn_t gfn) { unsigned long page_size; @@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) return ret; } +static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, + bool no_dirty_log) +{ + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return false; + if (no_dirty_log && slot->dirty_bitmap) + return false; + + return true; +} + static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) @@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (!slot || slot->flags & KVM_MEMSLOT_INVALID || - (no_dirty_log && slot->dirty_bitmap)) + if (!memslot_valid_for_gpte(slot, no_dirty_log)) slot = NULL; return slot; } -static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) -{ - return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); -} - -static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) +static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, + bool *force_pt_level) { int host_level, level, max_level; + struct kvm_memory_slot *slot; + + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); + *force_pt_level = !memslot_valid_for_gpte(slot, true); + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; host_level = host_mapping_level(vcpu->kvm, large_gfn); @@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) max_level = min(kvm_x86_ops->get_lpage_level(), host_level); for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) - if (has_wrprotected_page(vcpu, large_gfn, level)) + if (__has_wrprotected_page(large_gfn, level, slot)) break; return level - 1; @@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, { int r; int level; - int force_pt_level; + bool force_pt_level = false; pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; - force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); + level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { - level = mapping_level(vcpu, gfn); /* * This path builds a PAE pagetable - so we can map * 2mb pages at maximum. Therefore check if the level @@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, level = PT_DIRECTORY_LEVEL; gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } else - level = PT_PAGE_TABLE_LEVEL; + } if (fast_page_fault(vcpu, v, level, error_code)) return 0; @@ -3322,7 +3340,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) break; reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - leaf); + iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -3341,7 +3359,7 @@ exit: return reserved; } -int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) +int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; @@ -3350,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_MMIO_PF_EMULATE; reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); - if (unlikely(reserved)) + if (WARN_ON(reserved)) return RET_MMIO_PF_BUG; if (is_mmio_spte(spte)) { @@ -3374,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) */ return RET_MMIO_PF_RETRY; } -EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); - -static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, - u32 error_code, bool direct) -{ - int ret; - - ret = handle_mmio_page_fault_common(vcpu, addr, direct); - WARN_ON(ret == RET_MMIO_PF_BUG); - return ret; -} +EXPORT_SYMBOL_GPL(handle_mmio_page_fault); static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, bool prefault) @@ -3395,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gva, error_code, true); + r = handle_mmio_page_fault(vcpu, gva, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; @@ -3427,7 +3435,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) static bool can_do_async_pf(struct kvm_vcpu *vcpu) { - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || + if (unlikely(!lapic_in_kernel(vcpu) || kvm_event_needs_reinjection(vcpu))) return false; @@ -3476,7 +3484,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, pfn_t pfn; int r; int level; - int force_pt_level; + bool force_pt_level; gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; @@ -3485,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gpa, error_code, true); + r = handle_mmio_page_fault(vcpu, gpa, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; @@ -3495,20 +3503,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (r) return r; - if (mapping_level_dirty_bitmap(vcpu, gfn) || - !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL)) - force_pt_level = 1; - else - force_pt_level = 0; - + force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, + PT_DIRECTORY_LEVEL); + level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { - level = mapping_level(vcpu, gfn); if (level > PT_DIRECTORY_LEVEL && !check_hugepage_cache_consistency(vcpu, gfn, level)) level = PT_DIRECTORY_LEVEL; gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } else - level = PT_PAGE_TABLE_LEVEL; + } if (fast_page_fault(vcpu, gpa, level, error_code)) return 0; @@ -3614,7 +3617,7 @@ static void __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *rsvd_check, int maxphyaddr, int level, bool nx, bool gbpages, - bool pse) + bool pse, bool amd) { u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; @@ -3631,7 +3634,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ - if (guest_cpuid_is_amd(vcpu)) + if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { @@ -3699,14 +3702,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, context->nx, guest_cpuid_has_gbpages(vcpu), - is_pse(vcpu)); + is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, int maxphyaddr, bool execonly) { - int pte; + u64 bad_mt_xwr; rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); @@ -3724,14 +3727,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; - for (pte = 0; pte < 64; pte++) { - int rwx_bits = pte & 7; - int mt = pte >> 3; - if (mt == 0x2 || mt == 0x3 || mt == 0x7 || - rwx_bits == 0x2 || rwx_bits == 0x6 || - (rwx_bits == 0x4 && !execonly)) - rsvd_check->bad_mt_xwr |= (1ull << pte); + bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ + bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ + bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ + if (!execonly) { + /* bits 0..2 must not be 100 unless VMX capabilities allow it */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 4); } + rsvd_check->bad_mt_xwr = bad_mt_xwr; } static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, @@ -3749,13 +3754,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + /* + * Passing "true" to the last argument is okay; it adds a check + * on bit 8 of the SPTEs which KVM doesn't use anyway. + */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, context->nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), + true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); +static inline bool boot_cpu_is_amd(void) +{ + WARN_ON_ONCE(!tdp_enabled); + return shadow_x_mask == 0; +} + /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. @@ -3764,11 +3780,11 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - if (guest_cpuid_is_amd(vcpu)) + if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true); + cpu_has_gbpages, true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, |