aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2021-08-20 12:17:13 +0100
committerMarc Zyngier <maz@kernel.org>2021-08-20 12:22:35 +0100
commitcf0c7125d5783f93b78921845d4b101c65a7998b (patch)
tree9da6db673d97940138a204ae77a80c26021f04d4 /arch/arm64/kvm
parentMerge branch kvm-arm64/mmu/kmemleak-pkvm into kvmarm-master/next (diff)
parentKVM: arm64: Minor optimization of range_is_memory (diff)
downloadlinux-dev-cf0c7125d5783f93b78921845d4b101c65a7998b.tar.xz
linux-dev-cf0c7125d5783f93b78921845d4b101c65a7998b.zip
Merge branch kvm-arm64/mmu/el2-tracking into kvmarm-master/next
* kvm-arm64/mmu/el2-tracking: (25 commits) : Enable tracking of page sharing between host EL1 and EL2 KVM: arm64: Minor optimization of range_is_memory KVM: arm64: Make hyp_panic() more robust when protected mode is enabled KVM: arm64: Return -EPERM from __pkvm_host_share_hyp() KVM: arm64: Make __pkvm_create_mappings static KVM: arm64: Restrict EL2 stage-1 changes in protected mode KVM: arm64: Refactor protected nVHE stage-1 locking KVM: arm64: Remove __pkvm_mark_hyp KVM: arm64: Mark host bss and rodata section as shared KVM: arm64: Enable retrieving protections attributes of PTEs KVM: arm64: Introduce addr_is_memory() KVM: arm64: Expose pkvm_hyp_id KVM: arm64: Expose host stage-2 manipulation helpers KVM: arm64: Add helpers to tag shared pages in SW bits KVM: arm64: Allow populating software bits KVM: arm64: Enable forcing page-level stage-2 mappings KVM: arm64: Tolerate re-creating hyp mappings to set software bits KVM: arm64: Don't overwrite software bits with owner id KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED KVM: arm64: Optimize host memory aborts KVM: arm64: Expose page-table helpers ... Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig9
-rw-r--r--arch/arm64/kvm/arm.c57
-rw-r--r--arch/arm64/kvm/handle_exit.c23
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h33
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h3
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/spinlock.h25
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c238
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c22
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c82
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c208
-rw-r--r--arch/arm64/kvm/mmu.c28
13 files changed, 513 insertions, 256 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a4eba0908bfa..9b9721895e5c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -46,6 +46,15 @@ if KVM
source "virt/kvm/Kconfig"
+config NVHE_EL2_DEBUG
+ bool "Debug mode for non-VHE EL2 object"
+ help
+ Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
+ Failure reports will BUG() in the hypervisor. This is intended for
+ local EL2 hypervisor development.
+
+ If unsure, say N.
+
endif # KVM
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9eb80a34b4ae..fd9a6bcf797f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -91,10 +91,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.return_nisv_io_abort_to_user = true;
break;
case KVM_CAP_ARM_MTE:
- if (!system_supports_mte() || kvm->created_vcpus)
- return -EINVAL;
- r = 0;
- kvm->arch.mte_enabled = true;
+ mutex_lock(&kvm->lock);
+ if (!system_supports_mte() || kvm->created_vcpus) {
+ r = -EINVAL;
+ } else {
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ }
+ mutex_unlock(&kvm->lock);
break;
default:
r = -EINVAL;
@@ -1946,62 +1950,17 @@ static void _kvm_host_prot_finalize(void *discard)
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
}
-static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
- return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
-}
-
-#define pkvm_mark_hyp_section(__section) \
- pkvm_mark_hyp(__pa_symbol(__section##_start), \
- __pa_symbol(__section##_end))
-
static int finalize_hyp_mode(void)
{
- int cpu, ret;
-
if (!is_protected_kvm_enabled())
return 0;
- ret = pkvm_mark_hyp_section(__hyp_idmap_text);
- if (ret)
- return ret;
-
- ret = pkvm_mark_hyp_section(__hyp_text);
- if (ret)
- return ret;
-
- ret = pkvm_mark_hyp_section(__hyp_rodata);
- if (ret)
- return ret;
-
/*
* Exclude HYP BSS from kmemleak so that it doesn't get peeked
* at, which would end badly once the section is inaccessible.
* None of other sections should ever be introspected.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
- ret = pkvm_mark_hyp_section(__hyp_bss);
- if (ret)
- return ret;
-
- ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
- if (ret)
- return ret;
-
- for_each_possible_cpu(cpu) {
- phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
- phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
-
- ret = pkvm_mark_hyp(start, end);
- if (ret)
- return ret;
-
- start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
- end = start + PAGE_SIZE;
- ret = pkvm_mark_hyp(start, end);
- if (ret)
- return ret;
- }
/*
* Flip the static key upfront as that may no longer be possible
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6f48336b1d86..04ebab299aa4 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
}
-void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
+ u64 elr_virt, u64 elr_phys,
u64 par, uintptr_t vcpu,
u64 far, u64 hpfar) {
- u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
- u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
+ u64 elr_in_kimg = __phys_to_kimg(elr_phys);
+ u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
u64 mode = spsr & PSR_MODE_MASK;
/*
@@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
kvm_err("Invalid host exception to nVHE hyp!\n");
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
(esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
- struct bug_entry *bug = find_bug(elr_in_kimg);
const char *file = NULL;
unsigned int line = 0;
/* All hyp bugs, including warnings, are treated as fatal. */
- if (bug)
- bug_get_file_line(bug, &file, &line);
+ if (!is_protected_kvm_enabled() ||
+ IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ struct bug_entry *bug = find_bug(elr_in_kimg);
+
+ if (bug)
+ bug_get_file_line(bug, &file, &line);
+ }
if (file)
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
else
- kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
+ kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
} else {
- kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
+ kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
}
/*
@@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
- spsr, elr, esr, far, hpfar, par, vcpu);
+ spsr, elr_virt, esr, far, hpfar, par, vcpu);
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..03e604f842e2 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -12,6 +12,32 @@
#include <asm/virt.h>
#include <nvhe/spinlock.h>
+/*
+ * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ * 00: The page is owned exclusively by the page-table owner.
+ * 01: The page is owned by the page-table owner, but is shared
+ * with another entity.
+ * 10: The page is shared with, but not owned by the page-table owner.
+ * 11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+ PKVM_PAGE_OWNED = 0ULL,
+ PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
+ PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
+};
+
+#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+ enum pkvm_page_state state)
+{
+ return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+ return prot & PKVM_PAGE_STATE_PROT_MASK;
+}
+
struct host_kvm {
struct kvm_arch arch;
struct kvm_pgtable pgt;
@@ -20,9 +46,14 @@ struct host_kvm {
};
extern struct host_kvm host_kvm;
+extern const u8 pkvm_hyp_id;
+
int __pkvm_prot_finalize(void);
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
+int __pkvm_host_share_hyp(u64 pfn);
+bool addr_is_memory(phys_addr_t phys);
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
int kvm_host_prepare_stage2(void *pgt_pool_base);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 8ec3a5a7744b..c9a8f535212e 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -23,8 +23,7 @@ int hyp_map_vectors(void);
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
- unsigned long phys, enum kvm_pgtable_prot prot);
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 76b537f8d1c6..4652fd04bdbe 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -15,6 +15,7 @@
#include <asm/alternative.h>
#include <asm/lse.h>
+#include <asm/rwonce.h>
typedef union hyp_spinlock {
u32 __val;
@@ -89,4 +90,28 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
: "memory");
}
+static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
+{
+ hyp_spinlock_t lockval = READ_ONCE(*lock);
+
+ return lockval.owner != lockval.next;
+}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock)
+{
+ /*
+ * The __pkvm_init() path accesses protected data-structures without
+ * holding locks as the other CPUs are guaranteed to not enter EL2
+ * concurrently at this point in time. The point by which EL2 is
+ * initialized on all CPUs is reflected in the pkvm static key, so
+ * wait until it is set before checking the lock state.
+ */
+ if (static_branch_likely(&kvm_protected_mode_initialized))
+ BUG_ON(!hyp_spin_is_locked(lock));
+}
+#else
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { }
+#endif
+
#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 2b23400e0fb3..4b652ffb591d 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -7,6 +7,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
@@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic)
mov x29, x0
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ /* Ensure host stage-2 is disabled */
+ mrs x0, hcr_el2
+ bic x0, x0, #HCR_VM
+ msr hcr_el2, x0
+ isb
+ tlbi vmalls12e1
+ dsb nsh
+#endif
+
/* Load the panic arguments into x0-7 */
mrs x0, esr_el2
- get_vcpu_ptr x4, x5
- mrs x5, far_el2
- mrs x6, hpfar_el2
- mov x7, xzr // Unused argument
+ mov x4, x3
+ mov x3, x2
+ hyp_pa x3, x6
+ get_vcpu_ptr x5, x6
+ mrs x6, far_el2
+ mrs x7, hpfar_el2
/* Enter the host, conditionally restoring the host context. */
cbz x29, __host_enter_without_restoring
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1632f001f4ed..2da6aa8da868 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
}
-static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
+static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
{
- DECLARE_REG(unsigned long, start, host_ctxt, 1);
- DECLARE_REG(unsigned long, size, host_ctxt, 2);
- DECLARE_REG(unsigned long, phys, host_ctxt, 3);
- DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
- cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
+ cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
@@ -163,14 +160,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
-
-static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
-{
- DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
- DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
-
- cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
-}
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -193,10 +182,9 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__vgic_v3_restore_aprs),
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_cpu_set_vector),
- HANDLE_FUNC(__pkvm_create_mappings),
+ HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_prot_finalize),
- HANDLE_FUNC(__pkvm_mark_hyp),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d938ce95d3bd..5af2e28b9cd7 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool;
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
-static const u8 pkvm_hyp_id = 1;
+const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
@@ -89,6 +89,8 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
+
int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
@@ -101,8 +103,9 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
if (ret)
return ret;
- ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch,
- &host_kvm.mm_ops, KVM_HOST_S2_FLAGS);
+ ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ &host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
+ host_stage2_force_pte_cb);
if (ret)
return ret;
@@ -159,6 +162,11 @@ static int host_stage2_unmap_dev_all(void)
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
}
+struct kvm_mem_range {
+ u64 start;
+ u64 end;
+};
+
static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
{
int cur, left = 0, right = hyp_memblock_nr;
@@ -189,16 +197,26 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
return false;
}
+bool addr_is_memory(phys_addr_t phys)
+{
+ struct kvm_mem_range range;
+
+ return find_mem_range(phys, &range);
+}
+
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+ return range->start <= addr && addr < range->end;
+}
+
static bool range_is_memory(u64 start, u64 end)
{
- struct kvm_mem_range r1, r2;
+ struct kvm_mem_range r;
- if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2))
- return false;
- if (r1.start != r2.start)
+ if (!find_mem_range(start, &r))
return false;
- return true;
+ return is_in_mem_range(end - 1, &r);
}
static inline int __host_stage2_idmap(u64 start, u64 end,
@@ -208,60 +226,208 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
prot, &host_s2_pool);
}
+/*
+ * The pool has been provided with enough pages to cover all of memory with
+ * page granularity, but it is difficult to know how much of the MMIO range
+ * we will need to cover upfront, so we may need to 'recycle' the pages if we
+ * run out.
+ */
+#define host_stage2_try(fn, ...) \
+ ({ \
+ int __ret; \
+ hyp_assert_lock_held(&host_kvm.lock); \
+ __ret = fn(__VA_ARGS__); \
+ if (__ret == -ENOMEM) { \
+ __ret = host_stage2_unmap_dev_all(); \
+ if (!__ret) \
+ __ret = fn(__VA_ARGS__); \
+ } \
+ __ret; \
+ })
+
+static inline bool range_included(struct kvm_mem_range *child,
+ struct kvm_mem_range *parent)
+{
+ return parent->start <= child->start && child->end <= parent->end;
+}
+
+static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
+{
+ struct kvm_mem_range cur;
+ kvm_pte_t pte;
+ u32 level;
+ int ret;
+
+ hyp_assert_lock_held(&host_kvm.lock);
+ ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
+ if (ret)
+ return ret;
+
+ if (kvm_pte_valid(pte))
+ return -EAGAIN;
+
+ if (pte)
+ return -EPERM;
+
+ do {
+ u64 granule = kvm_granule_size(level);
+ cur.start = ALIGN_DOWN(addr, granule);
+ cur.end = cur.start + granule;
+ level++;
+ } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+ !(kvm_level_supports_block_mapping(level) &&
+ range_included(&cur, range)));
+
+ *range = cur;
+
+ return 0;
+}
+
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
+ enum kvm_pgtable_prot prot)
+{
+ hyp_assert_lock_held(&host_kvm.lock);
+
+ return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
+}
+
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
+{
+ hyp_assert_lock_held(&host_kvm.lock);
+
+ return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
+ addr, size, &host_s2_pool, owner_id);
+}
+
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
+{
+ /*
+ * Block mappings must be used with care in the host stage-2 as a
+ * kvm_pgtable_stage2_map() operation targeting a page in the range of
+ * an existing block will delete the block under the assumption that
+ * mappings in the rest of the block range can always be rebuilt lazily.
+ * That assumption is correct for the host stage-2 with RWX mappings
+ * targeting memory or RW mappings targeting MMIO ranges (see
+ * host_stage2_idmap() below which implements some of the host memory
+ * abort logic). However, this is not safe for any other mappings where
+ * the host stage-2 page-table is in fact the only place where this
+ * state is stored. In all those cases, it is safer to use page-level
+ * mappings, hence avoiding to lose the state because of side-effects in
+ * kvm_pgtable_stage2_map().
+ */
+ if (range_is_memory(addr, end))
+ return prot != PKVM_HOST_MEM_PROT;
+ else
+ return prot != PKVM_HOST_MMIO_PROT;
+}
+
static int host_stage2_idmap(u64 addr)
{
- enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
struct kvm_mem_range range;
bool is_memory = find_mem_range(addr, &range);
+ enum kvm_pgtable_prot prot;
int ret;
- if (is_memory)
- prot |= KVM_PGTABLE_PROT_X;
+ prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
hyp_spin_lock(&host_kvm.lock);
- ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range);
+ ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot);
- if (ret != -ENOMEM)
+ ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
+unlock:
+ hyp_spin_unlock(&host_kvm.lock);
+
+ return ret;
+}
+
+static inline bool check_prot(enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_prot required,
+ enum kvm_pgtable_prot denied)
+{
+ return (prot & (required | denied)) == required;
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+ phys_addr_t addr = hyp_pfn_to_phys(pfn);
+ enum kvm_pgtable_prot prot, cur;
+ void *virt = __hyp_va(addr);
+ enum pkvm_page_state state;
+ kvm_pte_t pte;
+ int ret;
+
+ if (!addr_is_memory(addr))
+ return -EINVAL;
+
+ hyp_spin_lock(&host_kvm.lock);
+ hyp_spin_lock(&pkvm_pgd_lock);
+
+ ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
+ if (ret)
goto unlock;
+ if (!pte)
+ goto map_shared;
/*
- * The pool has been provided with enough pages to cover all of memory
- * with page granularity, but it is difficult to know how much of the
- * MMIO range we will need to cover upfront, so we may need to 'recycle'
- * the pages if we run out.
+ * Check attributes in the host stage-2 PTE. We need the page to be:
+ * - mapped RWX as we're sharing memory;
+ * - not borrowed, as that implies absence of ownership.
+ * Otherwise, we can't let it got through
*/
- ret = host_stage2_unmap_dev_all();
- if (ret)
+ cur = kvm_pgtable_stage2_pte_prot(pte);
+ prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
+ if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
+ ret = -EPERM;
goto unlock;
+ }
- ret = __host_stage2_idmap(range.start, range.end, prot);
+ state = pkvm_getstate(cur);
+ if (state == PKVM_PAGE_OWNED)
+ goto map_shared;
-unlock:
- hyp_spin_unlock(&host_kvm.lock);
+ /*
+ * Tolerate double-sharing the same page, but this requires
+ * cross-checking the hypervisor stage-1.
+ */
+ if (state != PKVM_PAGE_SHARED_OWNED) {
+ ret = -EPERM;
+ goto unlock;
+ }
- return ret;
-}
+ ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
+ if (ret)
+ goto unlock;
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
- int ret;
+ /*
+ * If the page has been shared with the hypervisor, it must be
+ * already mapped as SHARED_BORROWED in its stage-1.
+ */
+ cur = kvm_pgtable_hyp_pte_prot(pte);
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+ if (!check_prot(cur, prot, ~prot))
+ ret = -EPERM;
+ goto unlock;
+map_shared:
/*
- * host_stage2_unmap_dev_all() currently relies on MMIO mappings being
- * non-persistent, so don't allow changing page ownership in MMIO range.
+ * If the page is not yet shared, adjust mappings in both page-tables
+ * while both locks are held.
*/
- if (!range_is_memory(start, end))
- return -EINVAL;
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+ ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
+ BUG_ON(ret);
- hyp_spin_lock(&host_kvm.lock);
- ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
- &host_s2_pool, pkvm_hyp_id);
+ prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+ ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
+ BUG_ON(ret);
+
+unlock:
+ hyp_spin_unlock(&pkvm_pgd_lock);
hyp_spin_unlock(&host_kvm.lock);
- return ret != -EAGAIN ? ret : 0;
+ return ret;
}
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index a8efdf0f9003..2fabeceb889a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -23,8 +23,8 @@ u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
- unsigned long phys, enum kvm_pgtable_prot prot)
+static int __pkvm_create_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot)
{
int err;
@@ -67,13 +67,15 @@ out:
return addr;
}
-int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
{
unsigned long start = (unsigned long)from;
unsigned long end = (unsigned long)to;
unsigned long virt_addr;
phys_addr_t phys;
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
int err;
phys = hyp_virt_to_phys((void *)virt_addr);
- err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
+ err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE,
+ phys, prot);
if (err)
return err;
}
@@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
+int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+{
+ int ret;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+ ret = pkvm_create_mappings_locked(from, to, prot);
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ return ret;
+}
+
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
{
unsigned long start, end;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..57c27846320f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
{
void *start, *end, *virt = hyp_phys_to_virt(phys);
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+ enum kvm_pgtable_prot prot;
int ret, i;
/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
- ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
- if (ret)
- return ret;
-
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
if (ret)
return ret;
@@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
- ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
- if (ret)
- return ret;
-
ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
if (ret)
return ret;
@@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ /*
+ * Map the host's .bss and .rodata sections RO in the hypervisor, but
+ * transfer the ownership from the host to the hypervisor itself to
+ * make sure it can't be donated or shared with another entity.
+ *
+ * The ownership transition requires matching changes in the host
+ * stage-2. This will be done later (see finalize_host_mappings()) once
+ * the hyp_vmemmap is addressable.
+ */
+ prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+ ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+ if (ret)
+ return ret;
+
+ ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+ if (ret)
+ return ret;
+
return 0;
}
@@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
hyp_put_page(&hpool, addr);
}
+static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag,
+ void * const arg)
+{
+ enum kvm_pgtable_prot prot;
+ enum pkvm_page_state state;
+ kvm_pte_t pte = *ptep;
+ phys_addr_t phys;
+
+ if (!kvm_pte_valid(pte))
+ return 0;
+
+ if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+ return -EINVAL;
+
+ phys = kvm_pte_to_phys(pte);
+ if (!addr_is_memory(phys))
+ return 0;
+
+ /*
+ * Adjust the host stage-2 mappings to match the ownership attributes
+ * configured in the hypervisor stage-1.
+ */
+ state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+ switch (state) {
+ case PKVM_PAGE_OWNED:
+ return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
+ case PKVM_PAGE_SHARED_OWNED:
+ prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+ break;
+ case PKVM_PAGE_SHARED_BORROWED:
+ prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+}
+
+static int finalize_host_mappings(void)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = finalize_host_mappings_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+}
+
void __noreturn __pkvm_init_finalise(void)
{
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
+ ret = finalize_host_mappings();
+ if (ret)
+ goto out;
+
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 78f36bd5df6c..f8ceebe4982e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,16 +11,12 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-#define KVM_PTE_VALID BIT(0)
#define KVM_PTE_TYPE BIT(1)
#define KVM_PTE_TYPE_BLOCK 0
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
-#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
-
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
@@ -40,6 +36,8 @@
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
@@ -48,9 +46,7 @@
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
KVM_PTE_LEAF_ATTR_HI_S2_XN)
-#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
#define KVM_MAX_OWNER_ID 1
struct kvm_pgtable_walk_data {
@@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data {
u64 end;
};
-static u64 kvm_granule_shift(u32 level)
-{
- /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
- return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
-}
-
-static u64 kvm_granule_size(u32 level)
-{
- return BIT(kvm_granule_shift(level));
-}
-
#define KVM_PHYS_INVALID (-1ULL)
static bool kvm_phys_is_valid(u64 phys)
@@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys)
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
}
-static bool kvm_level_supports_block_mapping(u32 level)
-{
- /*
- * Reject invalid block mappings and don't bother with 4TB mappings for
- * 52-bit PAs.
- */
- return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
-}
-
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
{
u64 granule = kvm_granule_size(level);
@@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
}
-static bool kvm_pte_valid(kvm_pte_t pte)
-{
- return pte & KVM_PTE_VALID;
-}
-
static bool kvm_pte_table(kvm_pte_t pte, u32 level)
{
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
@@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
}
-static u64 kvm_pte_to_phys(kvm_pte_t pte)
-{
- u64 pa = pte & KVM_PTE_ADDR_MASK;
-
- if (PAGE_SHIFT == 16)
- pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
-
- return pa;
-}
-
static kvm_pte_t kvm_phys_to_pte(u64 pa)
{
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
@@ -396,11 +357,47 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
+ attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
return 0;
}
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
+{
+ enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+ u32 ap;
+
+ if (!kvm_pte_valid(pte))
+ return prot;
+
+ if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
+ prot |= KVM_PGTABLE_PROT_X;
+
+ ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
+ if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
+ prot |= KVM_PGTABLE_PROT_R;
+ else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
+ prot |= KVM_PGTABLE_PROT_RW;
+
+ return prot;
+}
+
+static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
+{
+ /*
+ * Tolerate KVM recreating the exact same mapping, or changing software
+ * bits if the existing mapping was valid.
+ */
+ if (old == new)
+ return false;
+
+ if (!kvm_pte_valid(old))
+ return true;
+
+ return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
+}
+
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -410,9 +407,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
- /* Tolerate KVM recreating the exact same mapping */
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
- if (old != new && !WARN_ON(kvm_pte_valid(old)))
+ if (hyp_pte_needs_update(old, new))
smp_store_release(ptep, new);
data->phys += granule;
@@ -477,6 +473,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
+ pgt->force_pte_cb = NULL;
+
return 0;
}
@@ -514,6 +512,9 @@ struct stage2_map_data {
void *memcache;
struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Force mappings to page granularity */
+ bool force_pte;
};
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -578,11 +579,29 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+ attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
return 0;
}
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
+{
+ enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+
+ if (!kvm_pte_valid(pte))
+ return prot;
+
+ if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
+ prot |= KVM_PGTABLE_PROT_R;
+ if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
+ prot |= KVM_PGTABLE_PROT_W;
+ if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
+ prot |= KVM_PGTABLE_PROT_X;
+
+ return prot;
+}
+
static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
{
if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
@@ -627,6 +646,15 @@ static bool stage2_pte_executable(kvm_pte_t pte)
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
+static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
+ struct stage2_map_data *data)
+{
+ if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+ return false;
+
+ return kvm_block_mapping_supported(addr, end, data->phys, level);
+}
+
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
@@ -636,7 +664,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
- if (!kvm_block_mapping_supported(addr, end, phys, level))
+ if (!stage2_leaf_mapping_allowed(addr, end, level, data))
return -E2BIG;
if (kvm_phys_is_valid(phys))
@@ -680,7 +708,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
if (data->anchor)
return 0;
- if (!kvm_block_mapping_supported(addr, end, data->phys, level))
+ if (!stage2_leaf_mapping_allowed(addr, end, level, data))
return 0;
data->childp = kvm_pte_follow(*ptep, data->mm_ops);
@@ -810,6 +838,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
.mmu = pgt->mmu,
.memcache = mc,
.mm_ops = pgt->mm_ops,
+ .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -841,6 +870,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
.memcache = mc,
.mm_ops = pgt->mm_ops,
.owner_id = owner_id,
+ .force_pte = true,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -1034,6 +1064,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
u32 level;
kvm_pte_t set = 0, clr = 0;
+ if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
+ return -EINVAL;
+
if (prot & KVM_PGTABLE_PROT_R)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -1082,9 +1115,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
- struct kvm_pgtable_mm_ops *mm_ops,
- enum kvm_pgtable_stage2_flags flags)
+
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+ struct kvm_pgtable_mm_ops *mm_ops,
+ enum kvm_pgtable_stage2_flags flags,
+ kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
u64 vtcr = arch->vtcr;
@@ -1102,6 +1137,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch
pgt->mm_ops = mm_ops;
pgt->mmu = &arch->mmu;
pgt->flags = flags;
+ pgt->force_pte_cb = force_pte_cb;
/* Ensure zeroed PGD pages are visible to the hardware walker */
dsb(ishst);
@@ -1141,77 +1177,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
pgt->pgd = NULL;
}
-
-#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \
- KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \
- KVM_PTE_LEAF_ATTR_S2_IGNORED)
-
-static int stage2_check_permission_walker(u64 addr, u64 end, u32 level,
- kvm_pte_t *ptep,
- enum kvm_pgtable_walk_flags flag,
- void * const arg)
-{
- kvm_pte_t old_attr, pte = *ptep, *new_attr = arg;
-
- /*
- * Compatible mappings are either invalid and owned by the page-table
- * owner (whose id is 0), or valid with matching permission attributes.
- */
- if (kvm_pte_valid(pte)) {
- old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK;
- if (old_attr != *new_attr)
- return -EEXIST;
- } else if (pte) {
- return -EEXIST;
- }
-
- return 0;
-}
-
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot,
- struct kvm_mem_range *range)
-{
- kvm_pte_t attr;
- struct kvm_pgtable_walker check_perm_walker = {
- .cb = stage2_check_permission_walker,
- .flags = KVM_PGTABLE_WALK_LEAF,
- .arg = &attr,
- };
- u64 granule, start, end;
- u32 level;
- int ret;
-
- ret = stage2_set_prot_attr(pgt, prot, &attr);
- if (ret)
- return ret;
- attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
-
- for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) {
- granule = kvm_granule_size(level);
- start = ALIGN_DOWN(addr, granule);
- end = start + granule;
-
- if (!kvm_level_supports_block_mapping(level))
- continue;
-
- if (start < range->start || range->end < end)
- continue;
-
- /*
- * Check the presence of existing mappings with incompatible
- * permissions within the current block range, and try one level
- * deeper if one is found.
- */
- ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker);
- if (ret != -EEXIST)
- break;
- }
-
- if (!ret) {
- range->start = start;
- range->end = end;
- }
-
- return ret;
-}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index f446f55b6832..3f0aabb3a1e4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -260,10 +260,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size,
{
int err;
- if (!kvm_host_owns_hyp_mappings()) {
- return kvm_call_hyp_nvhe(__pkvm_create_mappings,
- start, size, phys, prot);
- }
+ if (WARN_ON(!kvm_host_owns_hyp_mappings()))
+ return -EINVAL;
mutex_lock(&kvm_hyp_pgd_mutex);
err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
@@ -283,6 +281,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
}
}
+static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+{
+ phys_addr_t addr;
+ int ret;
+
+ for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ __phys_to_pfn(addr));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -303,6 +316,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode())
return 0;
+ if (!kvm_host_owns_hyp_mappings()) {
+ if (WARN_ON(prot != PAGE_HYP))
+ return -EPERM;
+ return pkvm_share_hyp(kvm_kaddr_to_phys(from),
+ kvm_kaddr_to_phys(to));
+ }
+
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);