From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the -- cgit v1.2.3-59-g8ed1b From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 May 2021 12:52:43 -0500 Subject: signalfd: Remove SIL_PERF_EVENT fields from signalfd_siginfo With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo is dangerously close to running out of space. All that remains is just enough space for two additional 64bit fields. A practice of adding all possible siginfo_t fields into struct singalfd_siginfo can not be supported as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would require two 64bit fields and one 32bit fields. In practice the fields ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal that generates them always delivers them synchronously to the thread that triggers them. Therefore until someone actually needs the fields ssi_perf_data and ssi_perf_type in signalfd_siginfo remove them. This leaves a bit more room for future expansion. v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- fs/signalfd.c | 16 ++++++---------- include/uapi/linux/signalfd.h | 4 +--- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/signalfd.c b/fs/signalfd.c index 373df2f12415..167b5889db4b 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -114,12 +114,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: + case SIL_PERF_EVENT: /* - * Fall through to the SIL_FAULT case. Both SIL_FAULT_BNDERR - * and SIL_FAULT_PKUERR are only generated by faults that - * deliver them synchronously to userspace. In case someone - * injects one of these signals and signalfd catches it treat - * it as SIL_FAULT. + * Fall through to the SIL_FAULT case. SIL_FAULT_BNDERR, + * SIL_FAULT_PKUERR, and SIL_PERF_EVENT are only + * generated by faults that deliver them synchronously to + * userspace. In case someone injects one of these signals + * and signalfd catches it treat it as SIL_FAULT. */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; @@ -132,11 +133,6 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, new.ssi_addr = (long) kinfo->si_addr; new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; - case SIL_PERF_EVENT: - new.ssi_addr = (long) kinfo->si_addr; - new.ssi_perf_type = kinfo->si_perf_type; - new.ssi_perf_data = kinfo->si_perf_data; - break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index e78dddf433fc..83429a05b698 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,6 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 ssi_perf_type; - __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the @@ -51,7 +49,7 @@ struct signalfd_siginfo { * comes out of a read(2) and we really don't want to have * a compat on read(2). */ - __u8 __pad[16]; + __u8 __pad[28]; }; -- cgit v1.2.3-59-g8ed1b From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:12 +0100 Subject: KVM: arm64: Introduce MTE VM feature Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging for a VM. This will expose the feature to the guest and automatically tag memory pages touched by the VM as PG_mte_tagged (and clear the tag storage) to ensure that the guest cannot see stale tags, and so that the tags are correctly saved/restored across swap. Actually exposing the new capability to user space happens in a later patch. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price [maz: move VM_SHARED sampling into the critical section] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 3 ++ arch/arm64/include/asm/kvm_host.h | 4 +++ arch/arm64/kvm/hyp/exception.c | 3 +- arch/arm64/kvm/mmu.c | 67 +++++++++++++++++++++++++++++++++++- arch/arm64/kvm/sys_regs.c | 7 ++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 83 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 01b9857757f2..fd418955e31e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID2; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..1c4293c46ef6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -132,6 +132,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Memory Tagging Extension enabled for the guest */ + bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -769,6 +772,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 11541b94b328..0418399e0a20 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + if (kvm_has_mte(vcpu->kvm)) + new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..c6a97d463892 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,45 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +/* + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be + * able to see the page's tags and therefore they must be initialised first. If + * PG_mte_tagged is set, tags have already been initialised. + * + * The race in the test/set of the PG_mte_tagged flag is handled by: + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs + * racing to santise the same page + * - mmap_lock protects between a VM faulting a page in and the VMM performing + * an mprotect() to add VM_MTE + */ +static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) +{ + unsigned long i, nr_pages = size >> PAGE_SHIFT; + struct page *page; + + if (!kvm_has_mte(kvm)) + return 0; + + /* + * pfn_to_online_page() is used to reject ZONE_DEVICE pages + * that may not support tags. + */ + page = pfn_to_online_page(pfn); + + if (!page) + return -EFAULT; + + for (i = 0; i < nr_pages; i++, page++) { + if (!test_bit(PG_mte_tagged, &page->flags)) { + mte_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); + } + } + + return 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -830,6 +869,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; + bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -873,6 +913,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } + shared = (vma->vm_flags & VM_PFNMAP); + switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -971,8 +1013,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) + if (fault_status != FSC_PERM && !device) { + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ + if (kvm_has_mte(kvm) && shared) { + ret = -EFAULT; + goto out_unlock; + } + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); + if (ret) + goto out_unlock; + clean_dcache_guest_page(pfn, vma_pagesize); + } if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; @@ -1168,12 +1220,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); + int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); + if (ret) + return false; + /* * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. @@ -1381,6 +1438,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; + /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) + return -EINVAL; + /* * Take the intersection of this VMA with the memory region */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1a7968ad078c..36f67f8deae1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64PFR1_EL1: val &= ~FEATURE(ID_AA64PFR1_MTE); + if (kvm_has_mte(vcpu->kvm)) { + u64 pfr, mte; + + pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); + val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); + } break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..d4da58ddcad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_ARM_MTE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-59-g8ed1b From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:15 +0100 Subject: KVM: arm64: Add ioctl to fetch/store tags in a guest The VMM may not wish to have it's own mapping of guest memory mapped with PROT_MTE because this causes problems if the VMM has tag checking enabled (the guest controls the tags in physical RAM and it's unlikely the tags are correct for the VMM). Instead add a new ioctl which allows the VMM to easily read/write the tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE while the VMM can still read/write the tags for the purpose of migration. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com --- arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 11 ++++++ arch/arm64/kvm/arm.c | 7 ++++ arch/arm64/kvm/guest.c | 82 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 105 insertions(+) (limited to 'include/uapi/linux') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74a7447a83a1..c93a7198c242 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -730,6 +730,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); + /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index cf241b0f0a42..626d359b396e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -7,6 +7,7 @@ #define MTE_GRANULE_SIZE UL(16) #define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE) #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 24223adae150..b3edde68bc3e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 28ce26a68f09..511f3716fe33 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1359,6 +1359,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return 0; } + case KVM_ARM_MTE_COPY_TAGS: { + struct kvm_arm_copy_mte_tags copy_tags; + + if (copy_from_user(©_tags, argp, sizeof(copy_tags))) + return -EFAULT; + return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); + } default: return -EINVAL; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5cb4a1cd5603..4ddb20017b2f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } + +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) +{ + gpa_t guest_ipa = copy_tags->guest_ipa; + size_t length = copy_tags->length; + void __user *tags = copy_tags->addr; + gpa_t gfn; + bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST); + int ret = 0; + + if (!kvm_has_mte(kvm)) + return -EINVAL; + + if (copy_tags->reserved[0] || copy_tags->reserved[1]) + return -EINVAL; + + if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST) + return -EINVAL; + + if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) + return -EINVAL; + + gfn = gpa_to_gfn(guest_ipa); + + mutex_lock(&kvm->slots_lock); + + while (length > 0) { + kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL); + void *maddr; + unsigned long num_tags; + struct page *page; + + if (is_error_noslot_pfn(pfn)) { + ret = -EFAULT; + goto out; + } + + page = pfn_to_online_page(pfn); + if (!page) { + /* Reject ZONE_DEVICE memory */ + ret = -EFAULT; + goto out; + } + maddr = page_address(page); + + if (!write) { + if (test_bit(PG_mte_tagged, &page->flags)) + num_tags = mte_copy_tags_to_user(tags, maddr, + MTE_GRANULES_PER_PAGE); + else + /* No tags in memory, so write zeros */ + num_tags = MTE_GRANULES_PER_PAGE - + clear_user(tags, MTE_GRANULES_PER_PAGE); + kvm_release_pfn_clean(pfn); + } else { + num_tags = mte_copy_tags_from_user(maddr, tags, + MTE_GRANULES_PER_PAGE); + kvm_release_pfn_dirty(pfn); + } + + if (num_tags != MTE_GRANULES_PER_PAGE) { + ret = -EFAULT; + goto out; + } + + /* Set the flag after checking the write completed fully */ + if (write) + set_bit(PG_mte_tagged, &page->flags); + + gfn++; + tags += num_tags; + length -= PAGE_SIZE; + } + +out: + mutex_unlock(&kvm->slots_lock); + /* If some data has been copied report the number of bytes copied */ + if (length != copy_tags->length) + return copy_tags->length - length; + return ret; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d4da58ddcad7..da1edd2b4046 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3-59-g8ed1b