From 97418e968b01ba8e3ad41c38b42106c48bc19544 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 6 Dec 2019 10:08:02 +0800 Subject: KVM: arm/arm64: Remove excessive permission check in kvm_arch_prepare_memory_region In kvm_arch_prepare_memory_region, arm kvm regards the memory region as writable if the flag has no KVM_MEM_READONLY, and the vm is readonly if !VM_WRITE. But there is common usage for setting kvm memory region as follows: e.g. qemu side (see the PROT_NONE flag) 1. mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); memory_region_init_ram_ptr() 2. re mmap the above area with read/write authority. Such example is used in virtio-fs qemu codes which hasn't been upstreamed [1]. But seems we can't forbid this example. Without this patch, it will cause an EPERM during kvm_set_memory_region() and cause qemu boot crash. As told by Ard, "the underlying assumption is incorrect, i.e., that the value of vm_flags at this point in time defines how the VMA is used during its lifetime. There may be other cases where a VMA is created with VM_READ vm_flags that are changed to VM_READ|VM_WRITE later, and we are currently rejecting this use case as well." [1] https://gitlab.com/virtio-fs/qemu/blob/5a356e/hw/virtio/vhost-user-fs.c#L488 Suggested-by: Ard Biesheuvel Signed-off-by: Jia He Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Link: https://lore.kernel.org/r/20191206020802.196108-1-justin.he@arm.com --- virt/kvm/arm/mmu.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'virt/kvm/arm/mmu.c') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 38b4c910b6c3..a48994af70b8 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2301,15 +2301,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma || vma->vm_start >= reg_end) break; - /* - * Mapping a read-only VMA is only allowed if the - * memory region is configured as read-only. - */ - if (writable && !(vma->vm_flags & VM_WRITE)) { - ret = -EPERM; - break; - } - /* * Take the intersection of this VMA with the memory region */ -- cgit v1.2.3-59-g8ed1b From 6d674e28f642e3ff676fbae2d8d1b872814d32b6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Dec 2019 16:56:48 +0000 Subject: KVM: arm/arm64: Properly handle faulting of device mappings A device mapping is normally always mapped at Stage-2, since there is very little gain in having it faulted in. Nonetheless, it is possible to end-up in a situation where the device mapping has been removed from Stage-2 (userspace munmaped the VFIO region, and the MMU notifier did its job), but present in a userspace mapping (userpace has mapped it back at the same address). In such a situation, the device mapping will be demand-paged as the guest performs memory accesses. This requires to be careful when dealing with mapping size, cache management, and to handle potential execution of a device mapping. Reported-by: Alexandru Elisei Signed-off-by: Marc Zyngier Tested-by: Alexandru Elisei Reviewed-by: James Morse Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191211165651.7889-2-maz@kernel.org --- virt/kvm/arm/mmu.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'virt/kvm/arm/mmu.c') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a48994af70b8..0b32a904a1bb 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -38,6 +38,11 @@ static unsigned long io_map_base; #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) +static bool is_iomap(unsigned long flags) +{ + return flags & KVM_S2PTE_FLAG_IS_IOMAP; +} + static bool memslot_is_logging(struct kvm_memory_slot *memslot) { return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); @@ -1698,6 +1703,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = vma_kernel_pagesize(vma); if (logging_active || + (vma->vm_flags & VM_PFNMAP) || !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { force_pte = true; vma_pagesize = PAGE_SIZE; @@ -1760,6 +1766,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, writable = false; } + if (exec_fault && is_iomap(flags)) + return -ENOEXEC; + spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -1781,7 +1790,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) kvm_set_pfn_dirty(pfn); - if (fault_status != FSC_PERM) + if (fault_status != FSC_PERM && !is_iomap(flags)) clean_dcache_guest_page(pfn, vma_pagesize); if (exec_fault) @@ -1948,9 +1957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; - goto out_unlock; + ret = -ENOEXEC; + goto out; } /* @@ -1992,6 +2000,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; +out: + if (ret == -ENOEXEC) { + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + } out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; -- cgit v1.2.3-59-g8ed1b