From facee1be7689f8cf573b9ffee6a5c28ee193615e Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 28 Jul 2021 15:32:31 +0000 Subject: KVM: arm64: Fix off-by-one in range_is_memory Hyp checks whether an address range only covers RAM by checking the start/endpoints against a list of memblock_region structs. However, the endpoint here is exclusive but internally is treated as inclusive. Fix the off-by-one error that caused valid address ranges to be rejected. Cc: Quentin Perret Fixes: 90134ac9cabb6 ("KVM: arm64: Protect the .hyp sections from the host") Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728153232.1018911-2-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; -- cgit v1.2.3-59-g8ed1b From c4d7c51845af9542d42cd18a25c570583abf2768 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 29 Jul 2021 17:00:36 +0100 Subject: KVM: arm64: Fix race when enabling KVM_ARM_CAP_MTE When enabling KVM_CAP_ARM_MTE the ioctl checks that there are no VCPUs created to ensure that the capability is enabled before the VM is running. However no locks are held at that point so it is (theoretically) possible for another thread in the VMM to create VCPUs between the check and actually setting mte_enabled. Close the race by taking kvm->lock. Reported-by: Alexandru Elisei Fixes: 673638f434ee ("KVM: arm64: Expose KVM_ARM_CAP_MTE") Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210729160036.20433-1-steven.price@arm.com --- arch/arm64/kvm/arm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; -- cgit v1.2.3-59-g8ed1b From d21292f13f1f0721d60e8122e2db46bea8cf6950 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 9 Aug 2021 16:24:28 +0100 Subject: KVM: arm64: Add hyp_spin_is_locked() for basic locking assertions at EL2 Introduce hyp_spin_is_locked() so that functions can easily assert that a given lock is held (albeit possibly by another CPU!) without having to drag full lockdep support up to EL2. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-2-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 76b537f8d1c6..04f65b655fcf 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -15,6 +15,7 @@ #include #include +#include typedef union hyp_spinlock { u32 __val; @@ -89,4 +90,11 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock) : "memory"); } +static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock) +{ + hyp_spinlock_t lockval = READ_ONCE(*lock); + + return lockval.owner != lockval.next; +} + #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ -- cgit v1.2.3-59-g8ed1b From 8e049e0daf23aa380c264e5e15e4c64ea5497ed7 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:29 +0100 Subject: KVM: arm64: Introduce hyp_assert_lock_held() Introduce a poor man's lockdep implementation at EL2 which allows to BUG() whenever a hyp spinlock is not held when it should. Hide this feature behind a new Kconfig option that targets the EL2 object specifically, instead of piggy backing on the existing CONFIG_LOCKDEP. EL2 cannot WARN() cleanly to report locking issues, hence BUG() is the only option and it is not clear whether we want this widely enabled. This is most likely going to be useful for local testing until the EL2 WARN() situation has improved. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-3-qperret@google.com --- arch/arm64/kvm/Kconfig | 9 +++++++++ arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a4eba0908bfa..9b9721895e5c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -46,6 +46,15 @@ if KVM source "virt/kvm/Kconfig" +config NVHE_EL2_DEBUG + bool "Debug mode for non-VHE EL2 object" + help + Say Y here to enable the debug mode for the non-VHE KVM EL2 object. + Failure reports will BUG() in the hypervisor. This is intended for + local EL2 hypervisor development. + + If unsure, say N. + endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 04f65b655fcf..4652fd04bdbe 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -97,4 +97,21 @@ static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock) return lockval.owner != lockval.next; } +#ifdef CONFIG_NVHE_EL2_DEBUG +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) +{ + /* + * The __pkvm_init() path accesses protected data-structures without + * holding locks as the other CPUs are guaranteed to not enter EL2 + * concurrently at this point in time. The point by which EL2 is + * initialized on all CPUs is reflected in the pkvm static key, so + * wait until it is set before checking the lock state. + */ + if (static_branch_likely(&kvm_protected_mode_initialized)) + BUG_ON(!hyp_spin_is_locked(lock)); +} +#else +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { } +#endif + #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ -- cgit v1.2.3-59-g8ed1b From 1bac49d490cbc813f407a5c9806e464bf4a300c9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:30 +0100 Subject: KVM: arm64: Provide the host_stage2_try() helper macro We currently unmap all MMIO mappings from the host stage-2 to recycle the pages whenever we run out. In order to make this pattern easy to re-use from other places, factor the logic out into a dedicated macro. While at it, apply the macro for the kvm_pgtable_stage2_set_owner() calls. They're currently only called early on and are guaranteed to succeed, but making them robust to the -ENOMEM case doesn't hurt and will avoid painful debugging sessions later on. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-4-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 40 +++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..74280a753efb 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -208,6 +208,25 @@ static inline int __host_stage2_idmap(u64 start, u64 end, prot, &host_s2_pool); } +/* + * The pool has been provided with enough pages to cover all of memory with + * page granularity, but it is difficult to know how much of the MMIO range + * we will need to cover upfront, so we may need to 'recycle' the pages if we + * run out. + */ +#define host_stage2_try(fn, ...) \ + ({ \ + int __ret; \ + hyp_assert_lock_held(&host_kvm.lock); \ + __ret = fn(__VA_ARGS__); \ + if (__ret == -ENOMEM) { \ + __ret = host_stage2_unmap_dev_all(); \ + if (!__ret) \ + __ret = fn(__VA_ARGS__); \ + } \ + __ret; \ + }) + static int host_stage2_idmap(u64 addr) { enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; @@ -223,22 +242,7 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot); - if (ret != -ENOMEM) - goto unlock; - - /* - * The pool has been provided with enough pages to cover all of memory - * with page granularity, but it is difficult to know how much of the - * MMIO range we will need to cover upfront, so we may need to 'recycle' - * the pages if we run out. - */ - ret = host_stage2_unmap_dev_all(); - if (ret) - goto unlock; - - ret = __host_stage2_idmap(range.start, range.end, prot); - + ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -257,8 +261,8 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) return -EINVAL; hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_pool, pkvm_hyp_id); + ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + start, end - start, &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; -- cgit v1.2.3-59-g8ed1b From 51add457733bbc4a442fc280d73d14bfe262e4a0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:32 +0100 Subject: KVM: arm64: Expose page-table helpers The KVM pgtable API exposes the kvm_pgtable_walk() function to allow the definition of walkers outside of pgtable.c. However, it is not easy to implement any of those walkers without some of the low-level helpers. Move some of them to the header file to allow re-use from other places. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-6-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 40 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 39 ----------------------------------- 2 files changed, 40 insertions(+), 39 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e42b55bd50a2..83c21d35be10 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +#define KVM_PTE_VALID BIT(0) + +#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_48 GENMASK(15, 12) + +static inline bool kvm_pte_valid(kvm_pte_t pte) +{ + return pte & KVM_PTE_VALID; +} + +static inline u64 kvm_pte_to_phys(kvm_pte_t pte) +{ + u64 pa = pte & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + + return pa; +} + +static inline u64 kvm_granule_shift(u32 level) +{ + /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); +} + +static inline u64 kvm_granule_size(u32 level) +{ + return BIT(kvm_granule_shift(level)); +} + +static inline bool kvm_level_supports_block_mapping(u32 level) +{ + /* + * Reject invalid block mappings and don't bother with 4TB mappings for + * 52-bit PAs. + */ + return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); +} + /** * struct kvm_pgtable_mm_ops - Memory management callbacks. * @zalloc_page: Allocate a single zeroed memory page. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 78f36bd5df6c..49d768b92997 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -11,16 +11,12 @@ #include #include -#define KVM_PTE_VALID BIT(0) #define KVM_PTE_TYPE BIT(1) #define KVM_PTE_TYPE_BLOCK 0 #define KVM_PTE_TYPE_PAGE 1 #define KVM_PTE_TYPE_TABLE 1 -#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) -#define KVM_PTE_ADDR_51_48 GENMASK(15, 12) - #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) @@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data { u64 end; }; -static u64 kvm_granule_shift(u32 level) -{ - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ - return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); -} - -static u64 kvm_granule_size(u32 level) -{ - return BIT(kvm_granule_shift(level)); -} - #define KVM_PHYS_INVALID (-1ULL) static bool kvm_phys_is_valid(u64 phys) @@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys) return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); } -static bool kvm_level_supports_block_mapping(u32 level) -{ - /* - * Reject invalid block mappings and don't bother with 4TB mappings for - * 52-bit PAs. - */ - return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); -} - static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) { u64 granule = kvm_granule_size(level); @@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; } -static bool kvm_pte_valid(kvm_pte_t pte) -{ - return pte & KVM_PTE_VALID; -} - static bool kvm_pte_table(kvm_pte_t pte, u32 level) { if (level == KVM_PGTABLE_MAX_LEVELS - 1) @@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level) return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; } -static u64 kvm_pte_to_phys(kvm_pte_t pte) -{ - u64 pa = pte & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; - - return pa; -} - static kvm_pte_t kvm_phys_to_pte(u64 pa) { kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; -- cgit v1.2.3-59-g8ed1b From c4f0935e4d957bfcea25ad76860445660a60f3fd Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:33 +0100 Subject: KVM: arm64: Optimize host memory aborts The kvm_pgtable_stage2_find_range() function is used in the host memory abort path to try and look for the largest block mapping that can be used to map the faulting address. In order to do so, the function currently walks the stage-2 page-table and looks for existing incompatible mappings within the range of the largest possible block. If incompatible mappings are found, it tries the same procedure again, but using a smaller block range, and repeats until a matching range is found (potentially up to page granularity). While this approach has benefits (mostly in the fact that it proactively coalesces host stage-2 mappings), it can be slow if the ranges are fragmented, and it isn't optimized to deal with CPUs faulting on the same IPA as all of them will do all the work every time. To avoid these issues, remove kvm_pgtable_stage2_find_range(), and walk the page-table only once in the host_mem_abort() path to find the closest leaf to the input address. With this, use the corresponding range if it is invalid and not owned by another entity. If a valid leaf is found, return -EAGAIN similar to what is done in the kvm_pgtable_stage2_map() path to optimize concurrent faults. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-7-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 30 -------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 45 ++++++++++++++++++++- arch/arm64/kvm/hyp/pgtable.c | 74 ----------------------------------- 3 files changed, 44 insertions(+), 105 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 83c21d35be10..8d6c710c1996 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -154,16 +154,6 @@ enum kvm_pgtable_prot { #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) -/** - * struct kvm_mem_range - Range of Intermediate Physical Addresses - * @start: Start of the range. - * @end: End of the range. - */ -struct kvm_mem_range { - u64 start; - u64 end; -}; - /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -491,24 +481,4 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, kvm_pte_t *ptep, u32 *level); - -/** - * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical - * Addresses with compatible permission - * attributes. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * @addr: Address that must be covered by the range. - * @prot: Protection attributes that the range must be compatible with. - * @range: Range structure used to limit the search space at call time and - * that will hold the result. - * - * The offset of @addr within a page is ignored. An IPA is compatible with @prot - * iff its corresponding stage-2 page-table entry has default ownership and, if - * valid, is mapped with protection attributes identical to @prot. - * - * Return: 0 on success, negative error code on failure. - */ -int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot, - struct kvm_mem_range *range); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 74280a753efb..2148d3968aa5 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -159,6 +159,11 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +struct kvm_mem_range { + u64 start; + u64 end; +}; + static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; @@ -227,6 +232,44 @@ static inline int __host_stage2_idmap(u64 start, u64 end, __ret; \ }) +static inline bool range_included(struct kvm_mem_range *child, + struct kvm_mem_range *parent) +{ + return parent->start <= child->start && child->end <= parent->end; +} + +static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) +{ + struct kvm_mem_range cur; + kvm_pte_t pte; + u32 level; + int ret; + + hyp_assert_lock_held(&host_kvm.lock); + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + if (ret) + return ret; + + if (kvm_pte_valid(pte)) + return -EAGAIN; + + if (pte) + return -EPERM; + + do { + u64 granule = kvm_granule_size(level); + cur.start = ALIGN_DOWN(addr, granule); + cur.end = cur.start + granule; + level++; + } while ((level < KVM_PGTABLE_MAX_LEVELS) && + !(kvm_level_supports_block_mapping(level) && + range_included(&cur, range))); + + *range = cur; + + return 0; +} + static int host_stage2_idmap(u64 addr) { enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; @@ -238,7 +281,7 @@ static int host_stage2_idmap(u64 addr) prot |= KVM_PGTABLE_PROT_X; hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range); + ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 49d768b92997..4dff2ad39ee4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1102,77 +1102,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } - -#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \ - KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \ - KVM_PTE_LEAF_ATTR_S2_IGNORED) - -static int stage2_check_permission_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) -{ - kvm_pte_t old_attr, pte = *ptep, *new_attr = arg; - - /* - * Compatible mappings are either invalid and owned by the page-table - * owner (whose id is 0), or valid with matching permission attributes. - */ - if (kvm_pte_valid(pte)) { - old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK; - if (old_attr != *new_attr) - return -EEXIST; - } else if (pte) { - return -EEXIST; - } - - return 0; -} - -int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot, - struct kvm_mem_range *range) -{ - kvm_pte_t attr; - struct kvm_pgtable_walker check_perm_walker = { - .cb = stage2_check_permission_walker, - .flags = KVM_PGTABLE_WALK_LEAF, - .arg = &attr, - }; - u64 granule, start, end; - u32 level; - int ret; - - ret = stage2_set_prot_attr(pgt, prot, &attr); - if (ret) - return ret; - attr &= KVM_PTE_LEAF_S2_COMPAT_MASK; - - for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) { - granule = kvm_granule_size(level); - start = ALIGN_DOWN(addr, granule); - end = start + granule; - - if (!kvm_level_supports_block_mapping(level)) - continue; - - if (start < range->start || range->end < end) - continue; - - /* - * Check the presence of existing mappings with incompatible - * permissions within the current block range, and try one level - * deeper if one is found. - */ - ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker); - if (ret != -EEXIST) - break; - } - - if (!ret) { - range->start = start; - range->end = end; - } - - return ret; -} -- cgit v1.2.3-59-g8ed1b From 178cac08d588e7406a09351a992f57892d8d9cc9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:34 +0100 Subject: KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED The ignored bits for both stage-1 and stage-2 page and block descriptors are in [55:58], so rename KVM_PTE_LEAF_ATTR_S2_IGNORED to make it applicable to both. And while at it, since these bits are more commonly known as 'software' bits, rename accordingly. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-8-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4dff2ad39ee4..59a394d82de3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -36,6 +36,8 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) + #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) @@ -44,8 +46,6 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) -#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55) - #define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) #define KVM_MAX_OWNER_ID 1 -- cgit v1.2.3-59-g8ed1b From 8a0282c68121e53ab17413283cfed408a47e1a2a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:35 +0100 Subject: KVM: arm64: Don't overwrite software bits with owner id We will soon start annotating page-tables with new flags to track shared pages and such, and we will do so in valid mappings using software bits in the PTEs, as provided by the architecture. However, it is possible that we will need to use those flags to annotate invalid mappings as well in the future, similar to what we do to track page ownership in the host stage-2. In order to facilitate the annotation of invalid mappings with such flags, it would be preferable to re-use the same bits as for valid mappings (bits [58-55]), but these are currently used for ownership encoding. Since we have plenty of bits left to use in invalid mappings, move the ownership bits further down the PTE to avoid the conflict. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-9-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 59a394d82de3..1ee1168ac32d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -46,7 +46,7 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) -#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) #define KVM_MAX_OWNER_ID 1 struct kvm_pgtable_walk_data { -- cgit v1.2.3-59-g8ed1b From b53846c5f279cb5329b82f19a7d313f02cb9d21c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:36 +0100 Subject: KVM: arm64: Tolerate re-creating hyp mappings to set software bits The current hypervisor stage-1 mapping code doesn't allow changing an existing valid mapping. Relax this condition by allowing changes that only target software bits, as that will soon be needed to annotate shared pages. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-10-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1ee1168ac32d..2689fcb7901d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -362,6 +362,21 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) return 0; } +static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) +{ + /* + * Tolerate KVM recreating the exact same mapping, or changing software + * bits if the existing mapping was valid. + */ + if (old == new) + return false; + + if (!kvm_pte_valid(old)) + return true; + + return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW); +} + static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { @@ -371,9 +386,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; - /* Tolerate KVM recreating the exact same mapping */ new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (old != new && !WARN_ON(kvm_pte_valid(old))) + if (hyp_pte_needs_update(old, new)) smp_store_release(ptep, new); data->phys += granule; -- cgit v1.2.3-59-g8ed1b From 5651311941105ca077d3ab74dd4a92e646ecf7fb Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:37 +0100 Subject: KVM: arm64: Enable forcing page-level stage-2 mappings Much of the stage-2 manipulation logic relies on being able to destroy block mappings if e.g. installing a smaller mapping in the range. The rationale for this behaviour is that stage-2 mappings can always be re-created lazily. However, this gets more complicated when the stage-2 page-table is used to store metadata about the underlying pages. In such cases, destroying a block mapping may lead to losing part of the state, and confuse the user of those metadata (such as the hypervisor in nVHE protected mode). To avoid this, introduce a callback function in the pgtable struct which is called during all map operations to determine whether the mappings can use blocks, or should be forced to page granularity. This is used by the hypervisor when creating the host stage-2 to force page-level mappings when using non-default protection attributes. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-11-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 66 ++++++++++++++++++++++------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 34 +++++++++++++++--- arch/arm64/kvm/hyp/pgtable.c | 29 ++++++++++++--- 3 files changed, 94 insertions(+), 35 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 8d6c710c1996..ca0c039547b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -115,25 +115,6 @@ enum kvm_pgtable_stage2_flags { KVM_PGTABLE_S2_IDMAP = BIT(1), }; -/** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pte_t *pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; -}; - /** * enum kvm_pgtable_prot - Page-table permissions and attributes. * @KVM_PGTABLE_PROT_X: Execute permission. @@ -149,11 +130,43 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_DEVICE = BIT(3), }; -#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) + +#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX +#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW + +#define PAGE_HYP KVM_PGTABLE_PROT_RW #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) +typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, + enum kvm_pgtable_prot prot); + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pte_t *pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -246,21 +259,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); /** - * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table. + * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @arch: Arch-specific KVM structure representing the guest virtual * machine. * @mm_ops: Memory management callbacks. * @flags: Stage-2 configuration flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops, - enum kvm_pgtable_stage2_flags flags); +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags, + kvm_pgtable_force_pte_cb_t force_pte_cb); #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ - kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0) + __kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL) /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2148d3968aa5..6fed6772c673 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -89,6 +89,8 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); + int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; @@ -101,8 +103,9 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) if (ret) return ret; - ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS); + ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, + &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + host_stage2_force_pte_cb); if (ret) return ret; @@ -270,15 +273,36 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return 0; } +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) +{ + /* + * Block mappings must be used with care in the host stage-2 as a + * kvm_pgtable_stage2_map() operation targeting a page in the range of + * an existing block will delete the block under the assumption that + * mappings in the rest of the block range can always be rebuilt lazily. + * That assumption is correct for the host stage-2 with RWX mappings + * targeting memory or RW mappings targeting MMIO ranges (see + * host_stage2_idmap() below which implements some of the host memory + * abort logic). However, this is not safe for any other mappings where + * the host stage-2 page-table is in fact the only place where this + * state is stored. In all those cases, it is safer to use page-level + * mappings, hence avoiding to lose the state because of side-effects in + * kvm_pgtable_stage2_map(). + */ + if (range_is_memory(addr, end)) + return prot != PKVM_HOST_MEM_PROT; + else + return prot != PKVM_HOST_MMIO_PROT; +} + static int host_stage2_idmap(u64 addr) { - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); + enum kvm_pgtable_prot prot; int ret; - if (is_memory) - prot |= KVM_PGTABLE_PROT_X; + prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; hyp_spin_lock(&host_kvm.lock); ret = host_stage2_adjust_range(addr, &range); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 2689fcb7901d..e25d829587b9 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -452,6 +452,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; pgt->mm_ops = mm_ops; pgt->mmu = NULL; + pgt->force_pte_cb = NULL; + return 0; } @@ -489,6 +491,9 @@ struct stage2_map_data { void *memcache; struct kvm_pgtable_mm_ops *mm_ops; + + /* Force mappings to page granularity */ + bool force_pte; }; u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) @@ -602,6 +607,15 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } +static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, + struct stage2_map_data *data) +{ + if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1))) + return false; + + return kvm_block_mapping_supported(addr, end, data->phys, level); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) @@ -611,7 +625,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_block_mapping_supported(addr, end, phys, level)) + if (!stage2_leaf_mapping_allowed(addr, end, level, data)) return -E2BIG; if (kvm_phys_is_valid(phys)) @@ -655,7 +669,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, if (data->anchor) return 0; - if (!kvm_block_mapping_supported(addr, end, data->phys, level)) + if (!stage2_leaf_mapping_allowed(addr, end, level, data)) return 0; data->childp = kvm_pte_follow(*ptep, data->mm_ops); @@ -785,6 +799,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .mmu = pgt->mmu, .memcache = mc, .mm_ops = pgt->mm_ops, + .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -816,6 +831,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .memcache = mc, .mm_ops = pgt->mm_ops, .owner_id = owner_id, + .force_pte = true, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -1057,9 +1073,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) return kvm_pgtable_walk(pgt, addr, size, &walker); } -int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops, - enum kvm_pgtable_stage2_flags flags) + +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags, + kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; u64 vtcr = arch->vtcr; @@ -1077,6 +1095,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch pgt->mm_ops = mm_ops; pgt->mmu = &arch->mmu; pgt->flags = flags; + pgt->force_pte_cb = force_pte_cb; /* Ensure zeroed PGD pages are visible to the hardware walker */ dsb(ishst); -- cgit v1.2.3-59-g8ed1b From 4505e9b624cefafa4b75d8a28e72f32076c33375 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:38 +0100 Subject: KVM: arm64: Allow populating software bits Introduce infrastructure allowing to manipulate software bits in stage-1 and stage-2 page-tables using additional entries in the kvm_pgtable_prot enum. This is heavily inspired by Marc's implementation of a similar feature in the NV patch series, but adapted to allow stage-1 changes as well: https://lore.kernel.org/kvmarm/20210510165920.1913477-56-maz@kernel.org/ Suggested-by: Marc Zyngier Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-12-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 12 +++++++++++- arch/arm64/kvm/hyp/pgtable.c | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index ca0c039547b5..bfea573703d7 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -121,6 +121,10 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_SW0: Software bit 0. + * @KVM_PGTABLE_PROT_SW1: Software bit 1. + * @KVM_PGTABLE_PROT_SW2: Software bit 2. + * @KVM_PGTABLE_PROT_SW3: Software bit 3. */ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -128,6 +132,11 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + + KVM_PGTABLE_PROT_SW0 = BIT(55), + KVM_PGTABLE_PROT_SW1 = BIT(56), + KVM_PGTABLE_PROT_SW2 = BIT(57), + KVM_PGTABLE_PROT_SW3 = BIT(58), }; #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) @@ -420,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); * If there is a valid, leaf page-table entry used to translate @addr, then * relax the permissions in that entry according to the read, write and * execute permissions specified by @prot. No permissions are removed, and - * TLB invalidation is performed after updating the entry. + * TLB invalidation is performed after updating the entry. Software bits cannot + * be set or cleared using kvm_pgtable_stage2_relax_perms(). * * Return: 0 on success, negative error code on failure. */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e25d829587b9..cff744136044 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -357,6 +357,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; return 0; @@ -558,6 +559,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; return 0; @@ -1025,6 +1027,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0; + if (prot & KVM_PTE_LEAF_ATTR_HI_SW) + return -EINVAL; + if (prot & KVM_PGTABLE_PROT_R) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; -- cgit v1.2.3-59-g8ed1b From ec250a67ea8db6209918a389554cf3aec0395b1f Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:39 +0100 Subject: KVM: arm64: Add helpers to tag shared pages in SW bits We will soon start annotating shared pages in page-tables in nVHE protected mode. Define all the states in which a page can be (owned, shared and owned, shared and borrowed), and provide helpers allowing to convert this into SW bits annotations using the matching prot attributes. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-13-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 9c227d87c36d..87b1690c439f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -12,6 +12,32 @@ #include #include +/* + * SW bits 0-1 are reserved to track the memory ownership state of each page: + * 00: The page is owned exclusively by the page-table owner. + * 01: The page is owned by the page-table owner, but is shared + * with another entity. + * 10: The page is shared with, but not owned by the page-table owner. + * 11: Reserved for future use (lending). + */ +enum pkvm_page_state { + PKVM_PAGE_OWNED = 0ULL, + PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, + PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, +}; + +#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) +static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, + enum pkvm_page_state state) +{ + return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; +} + +static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) +{ + return prot & PKVM_PAGE_STATE_PROT_MASK; +} + struct host_kvm { struct kvm_arch arch; struct kvm_pgtable pgt; -- cgit v1.2.3-59-g8ed1b From 39257da0e04e5cdb1e4a3ca715dc3d949fe8b059 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:40 +0100 Subject: KVM: arm64: Expose host stage-2 manipulation helpers We will need to manipulate the host stage-2 page-table from outside mem_protect.c soon. Introduce two functions allowing this, and make them usable to users of mem_protect.h. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-14-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 87b1690c439f..0849ee8fa260 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,6 +49,8 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 6fed6772c673..f95a5a4aa09c 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -273,6 +273,22 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return 0; } +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, + enum kvm_pgtable_prot prot) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); +} + +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + addr, size, &host_s2_pool, owner_id); +} + static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) { /* @@ -309,7 +325,7 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot); + ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); unlock: hyp_spin_unlock(&host_kvm.lock); -- cgit v1.2.3-59-g8ed1b From 2d77e238badb022adb364332b7d6a1d627f77145 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:41 +0100 Subject: KVM: arm64: Expose pkvm_hyp_id Allow references to the hypervisor's owner id from outside mem_protect.c. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-15-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0849ee8fa260..23316a021880 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -46,6 +46,8 @@ struct host_kvm { }; extern struct host_kvm host_kvm; +extern const u8 pkvm_hyp_id; + int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f95a5a4aa09c..ee255171945c 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; -static const u8 pkvm_hyp_id = 1; +const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { -- cgit v1.2.3-59-g8ed1b From e009dce1292c37cf8ee7c33e0887ad3c642f980f Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:42 +0100 Subject: KVM: arm64: Introduce addr_is_memory() Introduce a helper usable in nVHE protected mode to check whether a physical address is in a RAM region or not. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-16-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 23316a021880..49db0ec5a606 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -51,6 +51,7 @@ extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); +bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ee255171945c..cb023d31666e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -197,6 +197,13 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) return false; } +bool addr_is_memory(phys_addr_t phys) +{ + struct kvm_mem_range range; + + return find_mem_range(phys, &range); +} + static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; -- cgit v1.2.3-59-g8ed1b From 9024b3d0069ab4b8ef70cf55f0ee09e61f3a0747 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:43 +0100 Subject: KVM: arm64: Enable retrieving protections attributes of PTEs Introduce helper functions in the KVM stage-2 and stage-1 page-table manipulation library allowing to retrieve the enum kvm_pgtable_prot of a PTE. This will be useful to implement custom walkers outside of pgtable.c. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-17-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 20 +++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index bfea573703d7..027783829584 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -507,4 +507,24 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, kvm_pte_t *ptep, u32 *level); + +/** + * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a + * stage-2 Page-Table Entry. + * @pte: Page-table entry + * + * Return: protection attributes of the page-table entry in the enum + * kvm_pgtable_prot format. + */ +enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte); + +/** + * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1 + * Page-Table Entry. + * @pte: Page-table entry + * + * Return: protection attributes of the page-table entry in the enum + * kvm_pgtable_prot format. + */ +enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cff744136044..f8ceebe4982e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -363,6 +363,26 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) return 0; } +enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) +{ + enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; + u32 ap; + + if (!kvm_pte_valid(pte)) + return prot; + + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) + prot |= KVM_PGTABLE_PROT_X; + + ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); + if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) + prot |= KVM_PGTABLE_PROT_R; + else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW) + prot |= KVM_PGTABLE_PROT_RW; + + return prot; +} + static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) { /* @@ -565,6 +585,23 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p return 0; } +enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) +{ + enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; + + if (!kvm_pte_valid(pte)) + return prot; + + if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R) + prot |= KVM_PGTABLE_PROT_R; + if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) + prot |= KVM_PGTABLE_PROT_W; + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) + prot |= KVM_PGTABLE_PROT_X; + + return prot; +} + static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) { if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) -- cgit v1.2.3-59-g8ed1b From 2c50166c62ba7f3c23c1bbdbb9324db462ddc97b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:44 +0100 Subject: KVM: arm64: Mark host bss and rodata section as shared As the hypervisor maps the host's .bss and .rodata sections in its stage-1, make sure to tag them as shared in hyp and host page-tables. But since the hypervisor relies on the presence of these mappings, we cannot let the host in complete control of the memory regions -- it must not unshare or donate them to another entity for example. To prevent this, let's transfer the ownership of those ranges to the hypervisor itself, and share the pages back with the host. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-18-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/setup.c | 82 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 8 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0b574d106519..57c27846320f 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, { void *start, *end, *virt = hyp_phys_to_virt(phys); unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT; + enum kvm_pgtable_prot prot; int ret, i; /* Recreate the hyp page-table using the early page allocator */ @@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO); - if (ret) - return ret; - ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); if (ret) return ret; @@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO); - if (ret) - return ret; - ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP); if (ret) return ret; @@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; } + /* + * Map the host's .bss and .rodata sections RO in the hypervisor, but + * transfer the ownership from the host to the hypervisor itself to + * make sure it can't be donated or shared with another entity. + * + * The ownership transition requires matching changes in the host + * stage-2. This will be done later (see finalize_host_mappings()) once + * the hyp_vmemmap is addressable. + */ + prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); + ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); + if (ret) + return ret; + return 0; } @@ -148,6 +159,57 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } +static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + enum kvm_pgtable_prot prot; + enum pkvm_page_state state; + kvm_pte_t pte = *ptep; + phys_addr_t phys; + + if (!kvm_pte_valid(pte)) + return 0; + + if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + return -EINVAL; + + phys = kvm_pte_to_phys(pte); + if (!addr_is_memory(phys)) + return 0; + + /* + * Adjust the host stage-2 mappings to match the ownership attributes + * configured in the hypervisor stage-1. + */ + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + switch (state) { + case PKVM_PAGE_OWNED: + return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + case PKVM_PAGE_SHARED_OWNED: + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); + break; + case PKVM_PAGE_SHARED_BORROWED: + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + break; + default: + return -EINVAL; + } + + return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); +} + +static int finalize_host_mappings(void) +{ + struct kvm_pgtable_walker walker = { + .cb = finalize_host_mappings_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = finalize_host_mappings(); + if (ret) + goto out; + pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, -- cgit v1.2.3-59-g8ed1b From ad0e0139a8e163245d8f44ab4f6ec3bc9b08034d Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:45 +0100 Subject: KVM: arm64: Remove __pkvm_mark_hyp Now that we mark memory owned by the hypervisor in the host stage-2 during __pkvm_init(), we no longer need to rely on the host to explicitly mark the hyp sections later on. Remove the __pkvm_mark_hyp() hypercall altogether. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-19-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 3 +- arch/arm64/kvm/arm.c | 46 --------------------------- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 - arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ------ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 19 ----------- 5 files changed, 1 insertion(+), 77 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9f0bf2109be7..432a9ea1f02e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -63,8 +63,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 -#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 -#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21 +#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..2f378482471b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1954,57 +1954,11 @@ static void _kvm_host_prot_finalize(void *discard) WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); } -static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) -{ - return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end); -} - -#define pkvm_mark_hyp_section(__section) \ - pkvm_mark_hyp(__pa_symbol(__section##_start), \ - __pa_symbol(__section##_end)) - static int finalize_hyp_mode(void) { - int cpu, ret; - if (!is_protected_kvm_enabled()) return 0; - ret = pkvm_mark_hyp_section(__hyp_idmap_text); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_text); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_rodata); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_bss); - if (ret) - return ret; - - ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size); - if (ret) - return ret; - - for_each_possible_cpu(cpu) { - phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]); - phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order()); - - ret = pkvm_mark_hyp(start, end); - if (ret) - return ret; - - start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu)); - end = start + PAGE_SIZE; - ret = pkvm_mark_hyp(start, end); - if (ret) - return ret; - } - /* * Flip the static key upfront as that may no longer be possible * once the host stage 2 is installed. diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 49db0ec5a606..0118527b07b0 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,7 +49,6 @@ extern struct host_kvm host_kvm; extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); -int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 1632f001f4ed..7900d5b66ba3 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -163,14 +163,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } - -static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(phys_addr_t, start, host_ctxt, 1); - DECLARE_REG(phys_addr_t, end, host_ctxt, 2); - - cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end); -} typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -196,7 +188,6 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_create_mappings), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), - HANDLE_FUNC(__pkvm_mark_hyp), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index cb023d31666e..2991dc6996b9 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -339,25 +339,6 @@ unlock: return ret; } -int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) -{ - int ret; - - /* - * host_stage2_unmap_dev_all() currently relies on MMIO mappings being - * non-persistent, so don't allow changing page ownership in MMIO range. - */ - if (!range_is_memory(start, end)) - return -EINVAL; - - hyp_spin_lock(&host_kvm.lock); - ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, - start, end - start, &host_s2_pool, pkvm_hyp_id); - hyp_spin_unlock(&host_kvm.lock); - - return ret != -EAGAIN ? ret : 0; -} - void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; -- cgit v1.2.3-59-g8ed1b From f9370010e92638f66473baf342e19de940403362 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:46 +0100 Subject: KVM: arm64: Refactor protected nVHE stage-1 locking Refactor the hypervisor stage-1 locking in nVHE protected mode to expose a new pkvm_create_mappings_locked() function. This will be used in later patches to allow walking and changing the hypervisor stage-1 without releasing the lock. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-20-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 + arch/arm64/kvm/hyp/nvhe/mm.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 8ec3a5a7744b..c76d7136ed9b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -23,6 +23,7 @@ int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot); unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index a8efdf0f9003..6fbe8e8030f6 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -67,13 +67,15 @@ out: return addr; } -int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) +int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot) { unsigned long start = (unsigned long)from; unsigned long end = (unsigned long)to; unsigned long virt_addr; phys_addr_t phys; + hyp_assert_lock_held(&pkvm_pgd_lock); + start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) int err; phys = hyp_virt_to_phys((void *)virt_addr); - err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot); + err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE, + phys, prot); if (err) return err; } @@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } +int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) +{ + int ret; + + hyp_spin_lock(&pkvm_pgd_lock); + ret = pkvm_create_mappings_locked(from, to, prot); + hyp_spin_unlock(&pkvm_pgd_lock); + + return ret; +} + int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) { unsigned long start, end; -- cgit v1.2.3-59-g8ed1b From 66c57edd3bc79e3527daaae8123f72ecd1e3fa25 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:47 +0100 Subject: KVM: arm64: Restrict EL2 stage-1 changes in protected mode The host kernel is currently able to change EL2 stage-1 mappings without restrictions thanks to the __pkvm_create_mappings() hypercall. But in a world where the host is no longer part of the TCB, this clearly poses a problem. To fix this, introduce a new hypercall to allow the host to share a physical memory page with the hypervisor, and remove the __pkvm_create_mappings() variant. The new hypercall implements ownership and permission checks before allowing the sharing operation, and it annotates the shared page in the hypervisor stage-1 and host stage-2 page-tables. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-21-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 ++-- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 88 +++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 28 +++++++-- 5 files changed, 118 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 432a9ea1f02e..aed2aa61766a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -59,7 +59,7 @@ #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 #define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 -#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16 +#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0118527b07b0..03e604f842e2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,6 +49,7 @@ extern struct host_kvm host_kvm; extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); +int __pkvm_host_share_hyp(u64 pfn); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 7900d5b66ba3..2da6aa8da868 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot); } -static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt) +static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(unsigned long, start, host_ctxt, 1); - DECLARE_REG(unsigned long, size, host_ctxt, 2); - DECLARE_REG(unsigned long, phys, host_ctxt, 3); - DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4); + DECLARE_REG(u64, pfn, host_ctxt, 1); - cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot); + cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); } static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) @@ -185,7 +182,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_init), HANDLE_FUNC(__pkvm_cpu_set_vector), - HANDLE_FUNC(__pkvm_create_mappings), + HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), }; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2991dc6996b9..8165390d3ec9 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -339,6 +339,94 @@ unlock: return ret; } +static inline bool check_prot(enum kvm_pgtable_prot prot, + enum kvm_pgtable_prot required, + enum kvm_pgtable_prot denied) +{ + return (prot & (required | denied)) == required; +} + +int __pkvm_host_share_hyp(u64 pfn) +{ + phys_addr_t addr = hyp_pfn_to_phys(pfn); + enum kvm_pgtable_prot prot, cur; + void *virt = __hyp_va(addr); + enum pkvm_page_state state; + kvm_pte_t pte; + int ret; + + if (!addr_is_memory(addr)) + return -EINVAL; + + hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&pkvm_pgd_lock); + + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); + if (ret) + goto unlock; + if (!pte) + goto map_shared; + + /* + * Check attributes in the host stage-2 PTE. We need the page to be: + * - mapped RWX as we're sharing memory; + * - not borrowed, as that implies absence of ownership. + * Otherwise, we can't let it got through + */ + cur = kvm_pgtable_stage2_pte_prot(pte); + prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { + ret = -EPERM; + goto unlock; + } + + state = pkvm_getstate(cur); + if (state == PKVM_PAGE_OWNED) + goto map_shared; + + /* + * Tolerate double-sharing the same page, but this requires + * cross-checking the hypervisor stage-1. + */ + if (state != PKVM_PAGE_SHARED_OWNED) { + ret = -EPERM; + goto unlock; + } + + ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); + if (ret) + goto unlock; + + /* + * If the page has been shared with the hypervisor, it must be + * already mapped as SHARED_BORROWED in its stage-1. + */ + cur = kvm_pgtable_hyp_pte_prot(pte); + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, prot, ~prot)) + ret = EPERM; + goto unlock; + +map_shared: + /* + * If the page is not yet shared, adjust mappings in both page-tables + * while both locks are held. + */ + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); + BUG_ON(ret); + + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); + BUG_ON(ret); + +unlock: + hyp_spin_unlock(&pkvm_pgd_lock); + hyp_spin_unlock(&host_kvm.lock); + + return ret; +} + void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..cbab146cda6a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -259,10 +259,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size, { int err; - if (!kvm_host_owns_hyp_mappings()) { - return kvm_call_hyp_nvhe(__pkvm_create_mappings, - start, size, phys, prot); - } + if (WARN_ON(!kvm_host_owns_hyp_mappings())) + return -EINVAL; mutex_lock(&kvm_hyp_pgd_mutex); err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot); @@ -282,6 +280,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) } } +static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) +{ + phys_addr_t addr; + int ret; + + for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) { + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, + __phys_to_pfn(addr)); + if (ret) + return ret; + } + + return 0; +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -302,6 +315,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) if (is_kernel_in_hyp_mode()) return 0; + if (!kvm_host_owns_hyp_mappings()) { + if (WARN_ON(prot != PAGE_HYP)) + return -EPERM; + return pkvm_share_hyp(kvm_kaddr_to_phys(from), + kvm_kaddr_to_phys(to)); + } + start = start & PAGE_MASK; end = PAGE_ALIGN(end); -- cgit v1.2.3-59-g8ed1b From 64a80fb766f9a91e26930bfc56d8e7c12425df12 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:48 +0100 Subject: KVM: arm64: Make __pkvm_create_mappings static The __pkvm_create_mappings() function is no longer used outside of nvhe/mm.c, make it static. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-22-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 2 -- arch/arm64/kvm/hyp/nvhe/mm.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index c76d7136ed9b..c9a8f535212e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -24,8 +24,6 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); -int __pkvm_create_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot); unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 6fbe8e8030f6..2fabeceb889a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -23,8 +23,8 @@ u64 __io_map_base; struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; unsigned int hyp_memblock_nr; -int __pkvm_create_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot) +static int __pkvm_create_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) { int err; -- cgit v1.2.3-59-g8ed1b From 12593568d7319c34c72038ea799ab1bd0f0eb01c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 11 Aug 2021 18:36:25 +0100 Subject: KVM: arm64: Return -EPERM from __pkvm_host_share_hyp() Fix the error code returned by __pkvm_host_share_hyp() when the host attempts to share with EL2 a page that has already been shared with another entity. Reported-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210811173630.2536721-1-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8165390d3ec9..6ec695311498 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -404,7 +404,7 @@ int __pkvm_host_share_hyp(u64 pfn) cur = kvm_pgtable_hyp_pte_prot(pte); prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); if (!check_prot(cur, prot, ~prot)) - ret = EPERM; + ret = -EPERM; goto unlock; map_shared: -- cgit v1.2.3-59-g8ed1b From ccac96977243d7916053550f62e6489760ad0adc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Aug 2021 14:03:36 +0100 Subject: KVM: arm64: Make hyp_panic() more robust when protected mode is enabled When protected mode is enabled, the host is unable to access most parts of the EL2 hypervisor image, including 'hyp_physvirt_offset' and the contents of the hypervisor's '.rodata.str' section. Unfortunately, nvhe_hyp_panic_handler() tries to read from both of these locations when handling a BUG() triggered at EL2; the former for converting the ELR to a physical address and the latter for displaying the name of the source file where the BUG() occurred. Hack the EL2 panic asm to pass both physical and virtual ELR values to the host and utilise the newly introduced CONFIG_NVHE_EL2_DEBUG so that we disable stage-2 protection for the host before returning to the EL1 panic handler. If the debug option is not enabled, display the address instead of the source file:line information. Cc: Andrew Scull Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210813130336.8139-1-will@kernel.org --- arch/arm64/kvm/handle_exit.c | 23 ++++++++++++++--------- arch/arm64/kvm/hyp/nvhe/host.S | 21 +++++++++++++++++---- 2 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 6f48336b1d86..04ebab299aa4 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } -void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, + u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar) { - u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr)); - u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr; + u64 elr_in_kimg = __phys_to_kimg(elr_phys); + u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt; u64 mode = spsr & PSR_MODE_MASK; /* @@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, kvm_err("Invalid host exception to nVHE hyp!\n"); } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) { - struct bug_entry *bug = find_bug(elr_in_kimg); const char *file = NULL; unsigned int line = 0; /* All hyp bugs, including warnings, are treated as fatal. */ - if (bug) - bug_get_file_line(bug, &file, &line); + if (!is_protected_kvm_enabled() || + IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { + struct bug_entry *bug = find_bug(elr_in_kimg); + + if (bug) + bug_get_file_line(bug, &file, &line); + } if (file) kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else - kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset); + kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset); } else { - kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset); + kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset); } /* @@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, kvm_err("Hyp Offset: 0x%llx\n", hyp_offset); panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n", - spsr, elr, esr, far, hpfar, par, vcpu); + spsr, elr_virt, esr, far, hpfar, par, vcpu); } diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 2b23400e0fb3..4b652ffb591d 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic) mov x29, x0 +#ifdef CONFIG_NVHE_EL2_DEBUG + /* Ensure host stage-2 is disabled */ + mrs x0, hcr_el2 + bic x0, x0, #HCR_VM + msr hcr_el2, x0 + isb + tlbi vmalls12e1 + dsb nsh +#endif + /* Load the panic arguments into x0-7 */ mrs x0, esr_el2 - get_vcpu_ptr x4, x5 - mrs x5, far_el2 - mrs x6, hpfar_el2 - mov x7, xzr // Unused argument + mov x4, x3 + mov x3, x2 + hyp_pa x3, x6 + get_vcpu_ptr x5, x6 + mrs x6, far_el2 + mrs x7, hpfar_el2 /* Enter the host, conditionally restoring the host context. */ cbz x29, __host_enter_without_restoring -- cgit v1.2.3-59-g8ed1b From 14ecf075fe5be01860927fdf3aa11d7b18023ab2 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 28 Jul 2021 15:32:32 +0000 Subject: KVM: arm64: Minor optimization of range_is_memory Currently range_is_memory finds the corresponding struct memblock_region for both the lower and upper bounds of the given address range with two rounds of binary search, and then checks that the two memblocks are the same. Simplify this by only doing binary search on the lower bound and then checking that the upper bound is in the same memblock. Signed-off-by: David Brazdil Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728153232.1018911-3-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c11b50dd0050..5af2e28b9cd7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -204,16 +204,19 @@ bool addr_is_memory(phys_addr_t phys) return find_mem_range(phys, &range); } +static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) +{ + return range->start <= addr && addr < range->end; +} + static bool range_is_memory(u64 start, u64 end) { - struct kvm_mem_range r1, r2; + struct kvm_mem_range r; - if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) - return false; - if (r1.start != r2.start) + if (!find_mem_range(start, &r)) return false; - return true; + return is_in_mem_range(end - 1, &r); } static inline int __host_stage2_idmap(u64 start, u64 end, -- cgit v1.2.3-59-g8ed1b