diff options
Diffstat (limited to 'arch/arm64/kvm/pkvm.c')
-rw-r--r-- | arch/arm64/kvm/pkvm.c | 294 |
1 files changed, 238 insertions, 56 deletions
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index b7be96a53597..fcd70bfe44fb 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -5,11 +5,12 @@ */ #include <linux/init.h> +#include <linux/interval_tree_generic.h> #include <linux/kmemleak.h> #include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> #include <linux/memblock.h> #include <linux/mutex.h> -#include <linux/sort.h> #include <asm/kvm_pkvm.h> @@ -23,23 +24,6 @@ static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); phys_addr_t hyp_mem_base; phys_addr_t hyp_mem_size; -static int cmp_hyp_memblock(const void *p1, const void *p2) -{ - const struct memblock_region *r1 = p1; - const struct memblock_region *r2 = p2; - - return r1->base < r2->base ? -1 : (r1->base > r2->base); -} - -static void __init sort_memblock_regions(void) -{ - sort(hyp_memory, - *hyp_memblock_nr_ptr, - sizeof(struct memblock_region), - cmp_hyp_memblock, - NULL); -} - static int __init register_memblock_regions(void) { struct memblock_region *reg; @@ -51,7 +35,6 @@ static int __init register_memblock_regions(void) hyp_memory[*hyp_memblock_nr_ptr] = *reg; (*hyp_memblock_nr_ptr)++; } - sort_memblock_regions(); return 0; } @@ -78,6 +61,7 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += host_s2_pgtable_pages(); hyp_mem_pages += hyp_vm_table_pages(); hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); + hyp_mem_pages += pkvm_selftest_pages(); hyp_mem_pages += hyp_ffa_proxy_pages(); /* @@ -110,6 +94,29 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) host_kvm->arch.pkvm.handle = 0; free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); + free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); +} + +static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) +{ + size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); + pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle; + void *hyp_vcpu; + int ret; + + vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; + + hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vcpu) + return -ENOMEM; + + ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu); + if (!ret) + vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED); + else + free_pages_exact(hyp_vcpu, hyp_vcpu_sz); + + return ret; } /* @@ -124,11 +131,8 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) */ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) { - size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; - struct kvm_vcpu *host_vcpu; - pkvm_handle_t handle; + size_t pgd_sz, hyp_vm_sz; void *pgd, *hyp_vm; - unsigned long idx; int ret; if (host_kvm->created_vcpus < 1) @@ -160,40 +164,11 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) if (ret < 0) goto free_vm; - handle = ret; - - host_kvm->arch.pkvm.handle = handle; - - /* Donate memory for the vcpus at hyp and initialize it. */ - hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); - kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { - void *hyp_vcpu; - - /* Indexing of the vcpus to be sequential starting at 0. */ - if (WARN_ON(host_vcpu->vcpu_idx != idx)) { - ret = -EINVAL; - goto destroy_vm; - } - - hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); - if (!hyp_vcpu) { - ret = -ENOMEM; - goto destroy_vm; - } - - ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, - hyp_vcpu); - if (ret) { - free_pages_exact(hyp_vcpu, hyp_vcpu_sz); - goto destroy_vm; - } - } + host_kvm->arch.pkvm.handle = ret; + host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; + kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); return 0; - -destroy_vm: - __pkvm_destroy_hyp_vm(host_kvm); - return ret; free_vm: free_pages_exact(hyp_vm, hyp_vm_sz); free_pgd: @@ -213,6 +188,18 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm) return ret; } +int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) +{ + int ret = 0; + + mutex_lock(&vcpu->kvm->arch.config_lock); + if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED)) + ret = __pkvm_create_hyp_vcpu(vcpu); + mutex_unlock(&vcpu->kvm->arch.config_lock); + + return ret; +} + void pkvm_destroy_hyp_vm(struct kvm *host_kvm) { mutex_lock(&host_kvm->arch.config_lock); @@ -222,7 +209,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm) int pkvm_init_host_vm(struct kvm *host_kvm) { - mutex_init(&host_kvm->lock); return 0; } @@ -259,6 +245,8 @@ static int __init finalize_pkvm(void) * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start); + kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); ret = pkvm_drop_host_privileges(); @@ -268,3 +256,197 @@ static int __init finalize_pkvm(void) return ret; } device_initcall_sync(finalize_pkvm); + +static u64 __pkvm_mapping_start(struct pkvm_mapping *m) +{ + return m->gfn * PAGE_SIZE; +} + +static u64 __pkvm_mapping_end(struct pkvm_mapping *m) +{ + return (m->gfn + m->nr_pages) * PAGE_SIZE - 1; +} + +INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last, + __pkvm_mapping_start, __pkvm_mapping_end, static, + pkvm_mapping); + +/* + * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow + * freeing of __map inline. + */ +#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ + for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \ + __start, __end - 1); \ + __tmp && ({ \ + __map = __tmp; \ + __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \ + true; \ + }); \ + ) + +int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) +{ + pgt->pkvm_mappings = RB_ROOT_CACHED; + pgt->mmu = mmu; + + return 0; +} + +static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end) +{ + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); + pkvm_handle_t handle = kvm->arch.pkvm.handle; + struct pkvm_mapping *mapping; + int ret; + + if (!handle) + return 0; + + for_each_mapping_in_range_safe(pgt, start, end, mapping) { + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn, + mapping->nr_pages); + if (WARN_ON(ret)) + return ret; + pkvm_mapping_remove(mapping, &pgt->pkvm_mappings); + kfree(mapping); + } + + return 0; +} + +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); +} + +int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, + u64 phys, enum kvm_pgtable_prot prot, + void *mc, enum kvm_pgtable_walk_flags flags) +{ + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); + struct pkvm_mapping *mapping = NULL; + struct kvm_hyp_memcache *cache = mc; + u64 gfn = addr >> PAGE_SHIFT; + u64 pfn = phys >> PAGE_SHIFT; + int ret; + + if (size != PAGE_SIZE && size != PMD_SIZE) + return -EINVAL; + + lockdep_assert_held_write(&kvm->mmu_lock); + + /* + * Calling stage2_map() on top of existing mappings is either happening because of a race + * with another vCPU, or because we're changing between page and block mappings. As per + * user_mem_abort(), same-size permission faults are handled in the relax_perms() path. + */ + mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1); + if (mapping) { + if (size == (mapping->nr_pages * PAGE_SIZE)) + return -EAGAIN; + + /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */ + ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); + if (ret) + return ret; + mapping = NULL; + } + + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot); + if (WARN_ON(ret)) + return ret; + + swap(mapping, cache->mapping); + mapping->gfn = gfn; + mapping->pfn = pfn; + mapping->nr_pages = size / PAGE_SIZE; + pkvm_mapping_insert(mapping, &pgt->pkvm_mappings); + + return ret; +} + +int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock); + + return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); +} + +int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); + pkvm_handle_t handle = kvm->arch.pkvm.handle; + struct pkvm_mapping *mapping; + int ret = 0; + + lockdep_assert_held(&kvm->mmu_lock); + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { + ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn, + mapping->nr_pages); + if (WARN_ON(ret)) + break; + } + + return ret; +} + +int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); + struct pkvm_mapping *mapping; + + lockdep_assert_held(&kvm->mmu_lock); + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) + __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), + PAGE_SIZE * mapping->nr_pages); + + return 0; +} + +bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) +{ + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); + pkvm_handle_t handle = kvm->arch.pkvm.handle; + struct pkvm_mapping *mapping; + bool young = false; + + lockdep_assert_held(&kvm->mmu_lock); + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) + young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, + mapping->nr_pages, mkold); + + return young; +} + +int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, + enum kvm_pgtable_walk_flags flags) +{ + return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); +} + +void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_walk_flags flags) +{ + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); +} + +void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) +{ + WARN_ON_ONCE(1); +} + +kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, + enum kvm_pgtable_prot prot, void *mc, bool force_pte) +{ + WARN_ON_ONCE(1); + return NULL; +} + +int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_mmu_memory_cache *mc) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} |