From fd3bc912d3d1ca3049beb9f905ee68df3b82282b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 28 Sep 2018 14:39:26 +0100 Subject: KVM: Documentation: Document arm64 core registers in detail Since the the sizes of individual members of the core arm64 registers vary, the list of register encodings that make sense is not a simple linear sequence. To clarify which encodings to use, this patch adds a brief list to the documentation. Signed-off-by: Dave Martin Reviewed-by: Julien Grall Reviewed-by: Peter Maydell Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7de9eee73fcd..2d4f7ce5e967 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2107,6 +2107,30 @@ contains elements ranging from 32 to 128 bits. The index is a 32bit value in the kvm_regs structure seen as a 32bit array. 0x60x0 0000 0010 +Specifically: + Encoding Register Bits kvm_regs member +---------------------------------------------------------------- + 0x6030 0000 0010 0000 X0 64 regs.regs[0] + 0x6030 0000 0010 0002 X1 64 regs.regs[1] + ... + 0x6030 0000 0010 003c X30 64 regs.regs[30] + 0x6030 0000 0010 003e SP 64 regs.sp + 0x6030 0000 0010 0040 PC 64 regs.pc + 0x6030 0000 0010 0042 PSTATE 64 regs.pstate + 0x6030 0000 0010 0044 SP_EL1 64 sp_el1 + 0x6030 0000 0010 0046 ELR_EL1 64 elr_el1 + 0x6030 0000 0010 0048 SPSR_EL1 64 spsr[KVM_SPSR_EL1] (alias SPSR_SVC) + 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT] + 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] + 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] + 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] + 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] + 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] + ... + 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] + 0x6020 0000 0010 00d4 FPSR 32 fp_regs.fpsr + 0x6020 0000 0010 00d5 FPCR 32 fp_regs.fpcr + arm64 CCSIDR registers are demultiplexed by CSSELR value: 0x6020 0000 0011 00 -- cgit v1.2.3-59-g8ed1b From 395f562f2b4cf9aef0db540d460b859fcde110b6 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 15 Jan 2019 17:02:08 +0000 Subject: KVM: Document errors for KVM_GET_ONE_REG and KVM_SET_ONE_REG KVM_GET_ONE_REG and KVM_SET_ONE_REG return some error codes that are not documented (but hopefully not surprising either). To give an indication of what these may mean, this patch adds brief documentation. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 2d4f7ce5e967..cd920dd1195c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1871,6 +1871,9 @@ Architectures: all Type: vcpu ioctl Parameters: struct kvm_one_reg (in) Returns: 0 on success, negative value on failure +Errors: +  ENOENT:   no such register +  EINVAL:   other errors, such as bad size encoding for a known register struct kvm_one_reg { __u64 id; @@ -2192,6 +2195,9 @@ Architectures: all Type: vcpu ioctl Parameters: struct kvm_one_reg (in and out) Returns: 0 on success, negative value on failure +Errors: +  ENOENT:   no such register +  EINVAL:   other errors, such as bad size encoding for a known register This ioctl allows to receive the value of a single register implemented in a vcpu. The register to read is indicated by the "id" field of the -- cgit v1.2.3-59-g8ed1b From 50036ad06b7f31f7312b43752185e37cf1d0b663 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 28 Sep 2018 14:39:27 +0100 Subject: KVM: arm64/sve: Document KVM API extensions for SVE This patch adds sections to the KVM API documentation describing the extensions for supporting the Scalable Vector Extension (SVE) in guests. Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 132 +++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index cd920dd1195c..68509dee23e8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1873,6 +1873,7 @@ Parameters: struct kvm_one_reg (in) Returns: 0 on success, negative value on failure Errors:  ENOENT:   no such register +  EPERM:    register access forbidden for architecture-dependent reasons  EINVAL:   other errors, such as bad size encoding for a known register struct kvm_one_reg { @@ -2127,13 +2128,20 @@ Specifically: 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] - 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] - 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] + 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] (*) + 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] (*) ... - 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] + 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] (*) 0x6020 0000 0010 00d4 FPSR 32 fp_regs.fpsr 0x6020 0000 0010 00d5 FPCR 32 fp_regs.fpcr +(*) These encodings are not accepted for SVE-enabled vcpus. See + KVM_ARM_VCPU_INIT. + + The equivalent register content can be accessed via bits [127:0] of + the corresponding SVE Zn registers instead for vcpus that have SVE + enabled (see below). + arm64 CCSIDR registers are demultiplexed by CSSELR value: 0x6020 0000 0011 00 @@ -2143,6 +2151,61 @@ arm64 system registers have the following id bit patterns: arm64 firmware pseudo-registers have the following bit pattern: 0x6030 0000 0014 +arm64 SVE registers have the following bit patterns: + 0x6080 0000 0015 00 Zn bits[2048*slice + 2047 : 2048*slice] + 0x6050 0000 0015 04 Pn bits[256*slice + 255 : 256*slice] + 0x6050 0000 0015 060 FFR bits[256*slice + 255 : 256*slice] + 0x6060 0000 0015 ffff KVM_REG_ARM64_SVE_VLS pseudo-register + +Access to slices beyond the maximum vector length configured for the +vcpu (i.e., where 16 * slice >= max_vq (**)) will fail with ENOENT. + +These registers are only accessible on vcpus for which SVE is enabled. +See KVM_ARM_VCPU_INIT for details. + +In addition, except for KVM_REG_ARM64_SVE_VLS, these registers are not +accessible until the vcpu's SVE configuration has been finalized +using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE). See KVM_ARM_VCPU_INIT +and KVM_ARM_VCPU_FINALIZE for more information about this procedure. + +KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector +lengths supported by the vcpu to be discovered and configured by +userspace. When transferred to or from user memory via KVM_GET_ONE_REG +or KVM_SET_ONE_REG, the value of this register is of type __u64[8], and +encodes the set of vector lengths as follows: + +__u64 vector_lengths[8]; + +if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX && + ((vector_lengths[(vq - 1) / 64] >> ((vq - 1) % 64)) & 1)) + /* Vector length vq * 16 bytes supported */ +else + /* Vector length vq * 16 bytes not supported */ + +(**) The maximum value vq for which the above condition is true is +max_vq. This is the maximum vector length available to the guest on +this vcpu, and determines which register slices are visible through +this ioctl interface. + +(See Documentation/arm64/sve.txt for an explanation of the "vq" +nomenclature.) + +KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT. +KVM_ARM_VCPU_INIT initialises it to the best set of vector lengths that +the host supports. + +Userspace may subsequently modify it if desired until the vcpu's SVE +configuration is finalized using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE). + +Apart from simply removing all vector lengths from the host set that +exceed some value, support for arbitrarily chosen sets of vector lengths +is hardware-dependent and may not be available. Attempting to configure +an invalid set of vector lengths via KVM_SET_ONE_REG will fail with +EINVAL. + +After the vcpu's SVE configuration is finalized, further attempts to +write this register will fail with EPERM. + MIPS registers are mapped using the lower 32 bits. The upper 16 of that is the register group type: @@ -2197,6 +2260,7 @@ Parameters: struct kvm_one_reg (in and out) Returns: 0 on success, negative value on failure Errors:  ENOENT:   no such register +  EPERM:    register access forbidden for architecture-dependent reasons  EINVAL:   other errors, such as bad size encoding for a known register This ioctl allows to receive the value of a single register implemented @@ -2690,6 +2754,33 @@ Possible features: - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. Depends on KVM_CAP_ARM_PMU_V3. + - KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only). + Depends on KVM_CAP_ARM_SVE. + Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE): + + * After KVM_ARM_VCPU_INIT: + + - KVM_REG_ARM64_SVE_VLS may be read using KVM_GET_ONE_REG: the + initial value of this pseudo-register indicates the best set of + vector lengths possible for a vcpu on this host. + + * Before KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE): + + - KVM_RUN and KVM_GET_REG_LIST are not available; + + - KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access + the scalable archietctural SVE registers + KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or + KVM_REG_ARM64_SVE_FFR; + + - KVM_REG_ARM64_SVE_VLS may optionally be written using + KVM_SET_ONE_REG, to modify the set of vector lengths available + for the vcpu. + + * After KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE): + + - the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can + no longer be written using KVM_SET_ONE_REG. 4.83 KVM_ARM_PREFERRED_TARGET @@ -3904,6 +3995,41 @@ number of valid entries in the 'entries' array, which is then filled. 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved, userspace should not expect to get any particular value there. +4.119 KVM_ARM_VCPU_FINALIZE + +Capability: KVM_CAP_ARM_SVE +Architectures: arm, arm64 +Type: vcpu ioctl +Parameters: int feature (in) +Returns: 0 on success, -1 on error +Errors: + EPERM: feature not enabled, needs configuration, or already finalized + EINVAL: unknown feature + +Recognised values for feature: + arm64 KVM_ARM_VCPU_SVE + +Finalizes the configuration of the specified vcpu feature. + +The vcpu must already have been initialised, enabling the affected feature, by +means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set in +features[]. + +For affected vcpu features, this is a mandatory step that must be performed +before the vcpu is fully usable. + +Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be +configured by use of ioctls such as KVM_SET_ONE_REG. The exact configuration +that should be performaned and how to do it are feature-dependent. + +Other calls that depend on a particular feature being finalized, such as +KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with +-EPERM unless the feature has already been finalized by means of a +KVM_ARM_VCPU_FINALIZE call. + +See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization +using this ioctl. + 5. The kvm_run structure ------------------------ -- cgit v1.2.3-59-g8ed1b From c110ae578ca0a10064dfbda3d786d6a733b9fe69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2019 17:24:03 +0100 Subject: kvm: move KVM_CAP_NR_MEMSLOTS to common code All architectures except MIPS were defining it in the same way, and memory slots are handled entirely by common code so there is no point in keeping the definition per-architecture. Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 ++--- arch/powerpc/kvm/powerpc.c | 3 --- arch/s390/kvm/kvm-s390.c | 3 --- arch/x86/kvm/x86.c | 3 --- virt/kvm/arm/arm.c | 3 --- virt/kvm/kvm_main.c | 2 ++ 6 files changed, 4 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 67068c47c591..b62ad0d94234 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1117,9 +1117,8 @@ struct kvm_userspace_memory_region { This ioctl allows the user to create, modify or delete a guest physical memory slot. Bits 0-15 of "slot" specify the slot id and this value should be less than the maximum number of user memory slots supported per -VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, -if this capability is supported by the architecture. Slots may not -overlap in guest physical address space. +VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS. +Slots may not overlap in guest physical address space. If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" specifies the address space which is being modified. They must be diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..92910b7c5bcc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -644,9 +644,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4638303ba6a8..28f35d2b06cb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -513,9 +513,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else if (sclp.has_esca && sclp.has_64bscao) r = KVM_S390_ESCA_CPU_SLOTS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 38440316a806..6c27d224f744 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3093,9 +3093,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..be4ec5f3ba5f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc8edc97ba85..684b67252cd5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_MAX_VCPU_ID: return KVM_MAX_VCPU_ID; + case KVM_CAP_NR_MEMSLOTS: + return KVM_USER_MEM_SLOTS; default: break; } -- cgit v1.2.3-59-g8ed1b From 13209ad0395c4de7fa48108b1dac72e341d5c089 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 28 Dec 2018 09:33:35 +0100 Subject: KVM: s390: add MSA9 to cpumodel This enables stfle.155 and adds the subfunctions for KDSA. Bit 155 is added to the list of facilities that will be enabled when there is no cpu model involved as MSA9 requires no additional handling from userspace, e.g. for migration. Please note that a cpu model enabled user space can and will have the final decision on the facility bits for a guests. Signed-off-by: Christian Borntraeger Acked-by: Janosch Frank Reviewed-by: Collin Walling Reviewed-by: David Hildenbrand --- Documentation/virtual/kvm/devices/vm.txt | 3 ++- arch/s390/include/asm/cpacf.h | 1 + arch/s390/include/uapi/asm/kvm.h | 3 ++- arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ arch/s390/tools/gen_facilities.c | 1 + tools/arch/s390/include/uapi/asm/kvm.h | 3 ++- 6 files changed, 21 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 95ca68d663a4..4ffb82b02468 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -141,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { u8 pcc[16]; # valid with Message-Security-Assist-Extension 4 u8 ppno[16]; # valid with Message-Security-Assist-Extension 5 u8 kma[16]; # valid with Message-Security-Assist-Extension 8 - u8 reserved[1808]; # reserved for future instructions + u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9 + u8 reserved[1792]; # reserved for future instructions }; Parameters: address of a buffer to load the subfunction blocks from. diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 3cc52e37b4b2..ce2770743cc5 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -28,6 +28,7 @@ #define CPACF_KMCTR 0xb92d /* MSA4 */ #define CPACF_PRNO 0xb93c /* MSA5 */ #define CPACF_KMA 0xb929 /* MSA8 */ +#define CPACF_KDSA 0xb93a /* MSA9 */ /* * En/decryption modifier bits diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 16511d97e8dc..09652eabe769 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -152,7 +152,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ __u8 kma[16]; /* with MSA8 */ - __u8 reserved[1808]; + __u8 kdsa[16]; /* with MSA9 */ + __u8 reserved[1792]; }; /* kvm attributes for crypto */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d3f3e63bb164..0dad61ccde3d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -368,6 +368,10 @@ static void kvm_s390_cpu_feat_init(void) __cpacf_query(CPACF_KMA, (cpacf_mask_t *) kvm_s390_available_subfunc.kma); + if (test_facility(155)) /* MSA9 */ + __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) + kvm_s390_available_subfunc.kdsa); + if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* @@ -1331,6 +1335,9 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm, VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); + VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); return 0; } @@ -1499,6 +1506,9 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm, VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); + VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); return 0; } @@ -1554,6 +1564,9 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); + VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); return 0; } diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index fd788e0f2e5b..e952cb3b75b2 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -93,6 +93,7 @@ static struct facility_def facility_defs[] = { 131, /* enhanced-SOP 2 and side-effect */ 139, /* multiple epoch facility */ 146, /* msa extension 8 */ + 155, /* msa extension 9 */ -1 /* END */ } }, diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 16511d97e8dc..09652eabe769 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -152,7 +152,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ __u8 kma[16]; /* with MSA8 */ - __u8 reserved[1808]; + __u8 kdsa[16]; /* with MSA9 */ + __u8 reserved[1792]; }; /* kvm attributes for crypto */ -- cgit v1.2.3-59-g8ed1b From 4bd774e57b29f5bbf296d1daf69cc761e1e75fa8 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 11 Apr 2019 17:09:59 +0100 Subject: KVM: arm64/sve: Simplify KVM_REG_ARM64_SVE_VLS array sizing A complicated DIV_ROUND_UP() expression is currently written out explicitly in multiple places in order to specify the size of the bitmap exchanged with userspace to represent the value of the KVM_REG_ARM64_SVE_VLS pseudo-register. Userspace currently has no direct way to work this out either: for documentation purposes, the size is just quoted as 8 u64s. To make this more intuitive, this patch replaces these with a single define, which is also exported to userspace as KVM_ARM64_SVE_VLS_WORDS. Since the number of words in a bitmap is just the index of the last word used + 1, this patch expresses the bound that way instead. This should make it clearer what is being expressed. For userspace convenience, the minimum and maximum possible vector lengths relevant to the KVM ABI are exposed to UAPI as KVM_ARM64_SVE_VQ_MIN, KVM_ARM64_SVE_VQ_MAX. Since the only direct use for these at present is manipulation of KVM_REG_ARM64_SVE_VLS, no corresponding _VL_ macros are defined. They could be added later if a need arises. Since use of DIV_ROUND_UP() was the only reason for including in guest.c, this patch also removes that #include. Suggested-by: Andrew Jones Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 10 ++++++---- arch/arm64/include/uapi/asm/kvm.h | 5 +++++ arch/arm64/kvm/guest.c | 7 +++---- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 68509dee23e8..03df379a02b0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2171,13 +2171,15 @@ and KVM_ARM_VCPU_FINALIZE for more information about this procedure. KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector lengths supported by the vcpu to be discovered and configured by userspace. When transferred to or from user memory via KVM_GET_ONE_REG -or KVM_SET_ONE_REG, the value of this register is of type __u64[8], and -encodes the set of vector lengths as follows: +or KVM_SET_ONE_REG, the value of this register is of type +__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as +follows: -__u64 vector_lengths[8]; +__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS]; if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX && - ((vector_lengths[(vq - 1) / 64] >> ((vq - 1) % 64)) & 1)) + ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >> + ((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1)) /* Vector length vq * 16 bytes supported */ else /* Vector length vq * 16 bytes not supported */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 2a04ef015469..edd2db8e5160 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -258,9 +258,14 @@ struct kvm_vcpu_events { KVM_REG_SIZE_U256 | \ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) +#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN +#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX + /* Vector lengths pseudo-register: */ #define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \ KVM_REG_SIZE_U512 | 0xffff) +#define KVM_ARM64_SVE_VLS_WORDS \ + ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 73044e3f8706..5bb909c3ff7c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -210,7 +209,7 @@ out: #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) static bool vq_present( - const u64 (*const vqs)[DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 64)], + const u64 (*const vqs)[KVM_ARM64_SVE_VLS_WORDS], unsigned int vq) { return (*vqs)[vq_word(vq)] & vq_mask(vq); @@ -219,7 +218,7 @@ static bool vq_present( static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned int max_vq, vq; - u64 vqs[DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 64)]; + u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; if (!vcpu_has_sve(vcpu)) return -ENOENT; @@ -243,7 +242,7 @@ static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned int max_vq, vq; - u64 vqs[DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 64)]; + u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; if (!vcpu_has_sve(vcpu)) return -ENOENT; -- cgit v1.2.3-59-g8ed1b From 9df2d660c7f37aed7244ec0b920c0749dbb69167 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 12 Apr 2019 13:28:05 +0100 Subject: KVM: Clarify capability requirements for KVM_ARM_VCPU_FINALIZE Userspace is only supposed to use KVM_ARM_VCPU_FINALIZE when there is some vcpu feature that can actually be finalized. This means that documenting KVM_ARM_VCPU_FINALIZE as available or not depending on the capabilities present is not helpful. This patch amends the documentation to describe availability in terms of which capability is required for each finalizable feature instead. In any case, userspace sees the same error (EINVAL) regardless of whether the given feature is not present or KVM_ARM_VCPU_FINALIZE is not implemented at all. No functional change. Suggested-by: Andrew Jones Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 03df379a02b0..5519df0d3ed0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3999,17 +3999,16 @@ userspace should not expect to get any particular value there. 4.119 KVM_ARM_VCPU_FINALIZE -Capability: KVM_CAP_ARM_SVE Architectures: arm, arm64 Type: vcpu ioctl Parameters: int feature (in) Returns: 0 on success, -1 on error Errors: EPERM: feature not enabled, needs configuration, or already finalized - EINVAL: unknown feature + EINVAL: feature unknown or not present Recognised values for feature: - arm64 KVM_ARM_VCPU_SVE + arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE) Finalizes the configuration of the specified vcpu feature. -- cgit v1.2.3-59-g8ed1b From fe365b4ea6c0df3eb44d636c32c5210ae1e58364 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 12 Apr 2019 12:59:47 +0100 Subject: KVM: Clarify KVM_{SET,GET}_ONE_REG error code documentation The current error code documentation for KVM_GET_ONE_REG and KVM_SET_ONE_REG could be read as implying that all architectures implement these error codes, or that KVM guarantees which error code is returned in a particular situation. Because this is not really the case, this patch waters down the documentation explicitly to remove such guarantees. EPERM is marked as arm64-specific, since for now arm64 really is the only architecture that yields this error code for the finalization-required case. Keeping this as a distinct error code is useful however for debugging due to the statefulness of the API in this instance. No functional change. Suggested-by: Andrew Jones Fixes: 395f562f2b4c ("KVM: Document errors for KVM_GET_ONE_REG and KVM_SET_ONE_REG") Fixes: 50036ad06b7f ("KVM: arm64/sve: Document KVM API extensions for SVE") Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5519df0d3ed0..818ac97fdabc 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1873,8 +1873,10 @@ Parameters: struct kvm_one_reg (in) Returns: 0 on success, negative value on failure Errors:  ENOENT:   no such register -  EPERM:    register access forbidden for architecture-dependent reasons -  EINVAL:   other errors, such as bad size encoding for a known register +  EINVAL:   invalid register ID, or no such register +  EPERM:    (arm64) register access not allowed before vcpu finalization +(These error codes are indicative only: do not rely on a specific error +code being returned in a specific situation.) struct kvm_one_reg { __u64 id; @@ -2260,10 +2262,12 @@ Architectures: all Type: vcpu ioctl Parameters: struct kvm_one_reg (in and out) Returns: 0 on success, negative value on failure -Errors: +Errors include:  ENOENT:   no such register -  EPERM:    register access forbidden for architecture-dependent reasons -  EINVAL:   other errors, such as bad size encoding for a known register +  EINVAL:   invalid register ID, or no such register +  EPERM:    (arm64) register access not allowed before vcpu finalization +(These error codes are indicative only: do not rely on a specific error +code being returned in a specific situation.) This ioctl allows to receive the value of a single register implemented in a vcpu. The register to read is indicated by the "id" field of the -- cgit v1.2.3-59-g8ed1b From 43b8e1f08938c0fd3b8924e846dba89863badc2f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 12 Apr 2019 13:25:38 +0100 Subject: KVM: arm64: Clarify access behaviour for out-of-range SVE register slice IDs The existing documentation for which SVE register slice IDs are considered out-of-range, and what happens when userspace tries to access them, is cryptic. This patch rewords the text with the aim of making it a bit easier to understand. No functional change. Suggested-by: Andrew Jones Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 818ac97fdabc..e410a9f0f0d4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2159,8 +2159,9 @@ arm64 SVE registers have the following bit patterns: 0x6050 0000 0015 060 FFR bits[256*slice + 255 : 256*slice] 0x6060 0000 0015 ffff KVM_REG_ARM64_SVE_VLS pseudo-register -Access to slices beyond the maximum vector length configured for the -vcpu (i.e., where 16 * slice >= max_vq (**)) will fail with ENOENT. +Access to register IDs where 2048 * slice >= 128 * max_vq will fail with +ENOENT. max_vq is the vcpu's maximum supported vector length in 128-bit +quadwords: see (**) below. These registers are only accessible on vcpus for which SVE is enabled. See KVM_ARM_VCPU_INIT for details. -- cgit v1.2.3-59-g8ed1b From a22fa321d13b0264976cbbc1d22f4c27c41d3642 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Tue, 23 Apr 2019 10:12:36 +0530 Subject: KVM: arm64: Add userspace flag to enable pointer authentication Now that the building blocks of pointer authentication are present, lets add userspace flags KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC. These flags will enable pointer authentication for the KVM guest on a per-vcpu basis through the ioctl KVM_ARM_VCPU_INIT. This features will allow the KVM guest to allow the handling of pointer authentication instructions or to treat them as undefined if not set. Necessary documentations are added to reflect the changes done. Reviewed-by: Dave Martin Signed-off-by: Amit Daniel Kachhap Cc: Mark Rutland Cc: Marc Zyngier Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Marc Zyngier --- Documentation/arm64/pointer-authentication.txt | 22 +++++++++++++++++---- Documentation/virtual/kvm/api.txt | 10 ++++++++++ arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 2 ++ arch/arm64/kvm/reset.c | 27 ++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arm64/pointer-authentication.txt b/Documentation/arm64/pointer-authentication.txt index 5baca42ba146..fc71b33de87e 100644 --- a/Documentation/arm64/pointer-authentication.txt +++ b/Documentation/arm64/pointer-authentication.txt @@ -87,7 +87,21 @@ used to get and set the keys for a thread. Virtualization -------------- -Pointer authentication is not currently supported in KVM guests. KVM -will mask the feature bits from ID_AA64ISAR1_EL1, and attempted use of -the feature will result in an UNDEFINED exception being injected into -the guest. +Pointer authentication is enabled in KVM guest when each virtual cpu is +initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and +requesting these two separate cpu features to be enabled. The current KVM +guest implementation works by enabling both features together, so both +these userspace flags are checked before enabling pointer authentication. +The separate userspace flag will allow to have no userspace ABI changes +if support is added in the future to allow these two features to be +enabled independently of one another. + +As Arm Architecture specifies that Pointer Authentication feature is +implemented along with the VHE feature so KVM arm64 ptrauth code relies +on VHE mode to be present. + +Additionally, when these vcpu feature flags are not set then KVM will +filter out the Pointer Authentication system key registers from +KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID +register. Any attempt to use the Pointer Authentication instructions will +result in an UNDEFINED exception being injected into the guest. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e410a9f0f0d4..32afe7f5c35a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2761,6 +2761,16 @@ Possible features: - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. Depends on KVM_CAP_ARM_PMU_V3. + - KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication + for arm64 only. + Both KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC + must be requested or neither must be requested. + + - KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication + for arm64 only. + Both KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC + must be requested or neither must be requested. + - KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only). Depends on KVM_CAP_ARM_SVE. Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE): diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7eebea7059c6..f772ac2fb3e9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -49,7 +49,7 @@ #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS -#define KVM_VCPU_MAX_FEATURES 5 +#define KVM_VCPU_MAX_FEATURES 7 #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index edd2db8e5160..7b7ac0f6cec9 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -104,6 +104,8 @@ struct kvm_regs { #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ +#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ +#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ struct kvm_vcpu_init { __u32 target; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3402543fdcd3..028d0c604652 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -221,6 +221,27 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); } +static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) +{ + /* Support ptrauth only if the system supports these capabilities. */ + if (!has_vhe()) + return -EINVAL; + + if (!system_supports_address_auth() || + !system_supports_generic_auth()) + return -EINVAL; + /* + * For now make sure that both address/generic pointer authentication + * features are requested by the userspace together. + */ + if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) + return -EINVAL; + + vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; + return 0; +} + /** * kvm_reset_vcpu - sets core registers and sys_regs to reset value * @vcpu: The VCPU pointer @@ -261,6 +282,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_reset_sve(vcpu); } + if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || + test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { + if (kvm_vcpu_enable_ptrauth(vcpu)) + goto out; + } + switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { -- cgit v1.2.3-59-g8ed1b From a243c16d18be130b17cf1064e9115de73bfdff5a Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Tue, 23 Apr 2019 10:12:37 +0530 Subject: KVM: arm64: Add capability to advertise ptrauth for guest This patch advertises the capability of two cpu feature called address pointer authentication and generic pointer authentication. These capabilities depend upon system support for pointer authentication and VHE mode. The current arm64 KVM partially implements pointer authentication and support of address/generic authentication are tied together. However, separate ABI requirements for both of them is added so that any future isolated implementation will not require any ABI changes. Signed-off-by: Amit Daniel Kachhap Cc: Mark Rutland Cc: Marc Zyngier Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 14 ++++++++++---- arch/arm64/kvm/reset.c | 5 +++++ include/uapi/linux/kvm.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 32afe7f5c35a..fac1887f25b5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2763,13 +2763,19 @@ Possible features: - KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication for arm64 only. - Both KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC - must be requested or neither must be requested. + Depends on KVM_CAP_ARM_PTRAUTH_ADDRESS. + If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are + both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and + KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be + requested. - KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication for arm64 only. - Both KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC - must be requested or neither must be requested. + Depends on KVM_CAP_ARM_PTRAUTH_GENERIC. + If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are + both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and + KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be + requested. - KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only). Depends on KVM_CAP_ARM_SVE. diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 028d0c604652..f0faf54f5857 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -101,6 +101,11 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SVE: r = system_supports_sve(); break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = has_vhe() && system_supports_address_auth() && + system_supports_generic_auth(); + break; default: r = 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1d564445b515..4dc34f8e29f6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -989,6 +989,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #define KVM_CAP_HYPERV_CPUID 167 #define KVM_CAP_ARM_SVE 168 +#define KVM_CAP_ARM_PTRAUTH_ADDRESS 169 +#define KVM_CAP_ARM_PTRAUTH_GENERIC 170 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-59-g8ed1b From a9bf3130ebfe0378df449a1b13997c47a58e661a Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 20:22:17 +0100 Subject: arm64: docs: Document perf event attributes The interaction between the exclude_{host,guest} flags, exclude_{user,kernel,hv} flags and presence of VHE can result in different exception levels being filtered by the ARMv8 PMU. As this can be confusing let's document how they work on arm64. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- Documentation/arm64/perf.txt | 85 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 Documentation/arm64/perf.txt (limited to 'Documentation') diff --git a/Documentation/arm64/perf.txt b/Documentation/arm64/perf.txt new file mode 100644 index 000000000000..0d6a7d87d49e --- /dev/null +++ b/Documentation/arm64/perf.txt @@ -0,0 +1,85 @@ +Perf Event Attributes +===================== + +Author: Andrew Murray +Date: 2019-03-06 + +exclude_user +------------ + +This attribute excludes userspace. + +Userspace always runs at EL0 and thus this attribute will exclude EL0. + + +exclude_kernel +-------------- + +This attribute excludes the kernel. + +The kernel runs at EL2 with VHE and EL1 without. Guest kernels always run +at EL1. + +For the host this attribute will exclude EL1 and additionally EL2 on a VHE +system. + +For the guest this attribute will exclude EL1. Please note that EL2 is +never counted within a guest. + + +exclude_hv +---------- + +This attribute excludes the hypervisor. + +For a VHE host this attribute is ignored as we consider the host kernel to +be the hypervisor. + +For a non-VHE host this attribute will exclude EL2 as we consider the +hypervisor to be any code that runs at EL2 which is predominantly used for +guest/host transitions. + +For the guest this attribute has no effect. Please note that EL2 is +never counted within a guest. + + +exclude_host / exclude_guest +---------------------------- + +These attributes exclude the KVM host and guest, respectively. + +The KVM host may run at EL0 (userspace), EL1 (non-VHE kernel) and EL2 (VHE +kernel or non-VHE hypervisor). + +The KVM guest may run at EL0 (userspace) and EL1 (kernel). + +Due to the overlapping exception levels between host and guests we cannot +exclusively rely on the PMU's hardware exception filtering - therefore we +must enable/disable counting on the entry and exit to the guest. This is +performed differently on VHE and non-VHE systems. + +For non-VHE systems we exclude EL2 for exclude_host - upon entering and +exiting the guest we disable/enable the event as appropriate based on the +exclude_host and exclude_guest attributes. + +For VHE systems we exclude EL1 for exclude_guest and exclude both EL0,EL2 +for exclude_host. Upon entering and exiting the guest we modify the event +to include/exclude EL0 as appropriate based on the exclude_host and +exclude_guest attributes. + +The statements above also apply when these attributes are used within a +non-VHE guest however please note that EL2 is never counted within a guest. + + +Accuracy +-------- + +On non-VHE hosts we enable/disable counters on the entry/exit of host/guest +transition at EL2 - however there is a period of time between +enabling/disabling the counters and entering/exiting the guest. We are +able to eliminate counters counting host events on the boundaries of guest +entry/exit when counting guest events by filtering out EL2 for +exclude_host. However when using !exclude_hv there is a small blackout +window at the guest entry/exit where host events are not captured. + +On VHE systems there are no blackout windows. -- cgit v1.2.3-59-g8ed1b From 90c73795afa24890bd2ae4f3b359de04b4147d37 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:27 +0200 Subject: KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the basic framework for the new KVM device supporting the XIVE native exploitation mode. The user interface exposes a new KVM device to be created by QEMU, only available when running on a L0 hypervisor. Support for nested guests is not available yet. The XIVE device reuses the device structure of the XICS-on-XIVE device as they have a lot in common. That could possibly change in the future if the need arise. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 19 +++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 8 ++ arch/powerpc/include/uapi/asm/kvm.h | 3 + arch/powerpc/kvm/Makefile | 2 +- arch/powerpc/kvm/book3s.c | 7 +- arch/powerpc/kvm/book3s_xive_native.c | 179 +++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 + 8 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/xive.txt create mode 100644 arch/powerpc/kvm/book3s_xive_native.c (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt new file mode 100644 index 000000000000..fdbd2ff92a88 --- /dev/null +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -0,0 +1,19 @@ +POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1) +========================================================== + +Device types supported: + KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1 + +This device acts as a VM interrupt controller. It provides the KVM +interface to configure the interrupt sources of a VM in the underlying +POWER9 XIVE interrupt controller. + +Only one XIVE instance may be instantiated. A guest XIVE device +requires a POWER9 host and the guest OS should have support for the +XIVE native exploitation interrupt mode. If not, it should run using +the legacy interrupt mode, referred as XICS (POWER7/8). + +* Groups: + + 1. KVM_DEV_XIVE_GRP_CTRL + Provides global controls on the device diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 07cefa53222e..9074f1d7613c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -225,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops; struct kvmppc_xive; struct kvmppc_xive_vcpu; extern struct kvm_device_ops kvm_xive_ops; +extern struct kvm_device_ops kvm_xive_native_ops; struct kvmppc_passthru_irqmap; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0e99a6f8066f..f746d85f36c7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -593,6 +593,10 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); + +extern void kvmppc_xive_native_init_module(void); +extern void kvmppc_xive_native_exit_module(void); + #else static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) { return -1; } @@ -616,6 +620,10 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { return -ENODEV; } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } + +static inline void kvmppc_xive_native_init_module(void) { } +static inline void kvmppc_xive_native_exit_module(void) { } + #endif /* CONFIG_KVM_XIVE */ #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 26ca425f4c2c..be0ce1f17625 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -677,4 +677,7 @@ struct kvm_ppc_cpu_char { #define KVM_XICS_PRESENTED (1ULL << 43) #define KVM_XICS_QUEUED (1ULL << 44) +/* POWER9 XIVE Native Interrupt Controller */ +#define KVM_DEV_XIVE_GRP_CTRL 1 + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3223aec88b2c..4c67cc79de7c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -94,7 +94,7 @@ endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o -kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o +kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o kvm-book3s_64-module-objs := \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10c5579d20ce..7c3348fa27e1 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1050,6 +1050,9 @@ static int kvmppc_book3s_init(void) if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); + kvmppc_xive_native_init_module(); + kvm_register_device_ops(&kvm_xive_native_ops, + KVM_DEV_TYPE_XIVE); } else #endif kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); @@ -1060,8 +1063,10 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { #ifdef CONFIG_KVM_XICS - if (xics_on_xive()) + if (xics_on_xive()) { kvmppc_xive_exit_module(); + kvmppc_xive_native_exit_module(); + } #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kvmppc_book3s_exit_pr(); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c new file mode 100644 index 000000000000..751259394150 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017-2019, IBM Corporation. + */ + +#define pr_fmt(fmt) "xive-kvm: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "book3s_xive.h" + +static int kvmppc_xive_native_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_XIVE_GRP_CTRL: + break; + } + return -ENXIO; +} + +static int kvmppc_xive_native_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int kvmppc_xive_native_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_XIVE_GRP_CTRL: + break; + } + return -ENXIO; +} + +static void kvmppc_xive_native_free(struct kvm_device *dev) +{ + struct kvmppc_xive *xive = dev->private; + struct kvm *kvm = xive->kvm; + + debugfs_remove(xive->dentry); + + pr_devel("Destroying xive native device\n"); + + if (kvm) + kvm->arch.xive = NULL; + + if (xive->vp_base != XIVE_INVALID_VP) + xive_native_free_vp_block(xive->vp_base); + + kfree(xive); + kfree(dev); +} + +static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) +{ + struct kvmppc_xive *xive; + struct kvm *kvm = dev->kvm; + int ret = 0; + + pr_devel("Creating xive native device\n"); + + if (kvm->arch.xive) + return -EEXIST; + + xive = kzalloc(sizeof(*xive), GFP_KERNEL); + if (!xive) + return -ENOMEM; + + dev->private = xive; + xive->dev = dev; + xive->kvm = kvm; + kvm->arch.xive = xive; + + /* + * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for + * a default. Getting the max number of CPUs the VM was + * configured with would improve our usage of the XIVE VP space. + */ + xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); + pr_devel("VP_Base=%x\n", xive->vp_base); + + if (xive->vp_base == XIVE_INVALID_VP) + ret = -ENXIO; + + xive->single_escalation = xive_native_has_single_escalation(); + + if (ret) + kfree(xive); + + return ret; +} + +static int xive_native_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xive *xive = m->private; + struct kvm *kvm = xive->kvm; + + if (!kvm) + return 0; + + return 0; +} + +static int xive_native_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xive_native_debug_show, inode->i_private); +} + +static const struct file_operations xive_native_debug_fops = { + .open = xive_native_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xive_native_debugfs_init(struct kvmppc_xive *xive) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xive, &xive_native_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static void kvmppc_xive_native_init(struct kvm_device *dev) +{ + struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + + /* Register some debug interfaces */ + xive_native_debugfs_init(xive); +} + +struct kvm_device_ops kvm_xive_native_ops = { + .name = "kvm-xive-native", + .create = kvmppc_xive_native_create, + .init = kvmppc_xive_native_init, + .destroy = kvmppc_xive_native_free, + .set_attr = kvmppc_xive_native_set_attr, + .get_attr = kvmppc_xive_native_get_attr, + .has_attr = kvmppc_xive_native_has_attr, +}; + +void kvmppc_xive_native_init_module(void) +{ + ; +} + +void kvmppc_xive_native_exit_module(void) +{ + ; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..e6368163d3a0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1211,6 +1211,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_ITS, #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_XIVE, +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_MAX, }; -- cgit v1.2.3-59-g8ed1b From eacc56bb9de3e6830ddc169553772cd6de59ee4c Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:28 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Introduce a new capability KVM_CAP_PPC_IRQ_XIVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user interface exposes a new capability KVM_CAP_PPC_IRQ_XIVE to let QEMU connect the vCPU presenters to the XIVE KVM device if required. The capability is not advertised for now as the full support for the XIVE native exploitation mode is not yet available. When this is case, the capability will be advertised on PowerNV Hypervisors only. Nested guests (pseries KVM Hypervisor) are not supported. Internally, the interface to the new KVM device is protected with a new interrupt mode: KVMPPC_IRQ_XIVE. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 9 ++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 13 +++ arch/powerpc/kvm/book3s_xive.c | 88 ++++++++++---------- arch/powerpc/kvm/book3s_xive.h | 11 +++ arch/powerpc/kvm/book3s_xive_native.c | 150 ++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 36 ++++++++ include/uapi/linux/kvm.h | 1 + 8 files changed, 268 insertions(+), 41 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 67068c47c591..e38eb17b7be6 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4504,6 +4504,15 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; +6.75 KVM_CAP_PPC_IRQ_XIVE + +Architectures: ppc +Target: vcpu +Parameters: args[0] is the XIVE device fd + args[1] is the XIVE CPU number (server ID) for this vcpu + +This capability connects the vcpu to an in-kernel XIVE device. + 7. Capabilities that can be enabled on VMs ------------------------------------------ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9074f1d7613c..eac25fd7e631 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -453,6 +453,7 @@ struct kvmppc_passthru_irqmap { #define KVMPPC_IRQ_DEFAULT 0 #define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ +#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ #define MMIO_HPTE_CACHE_SIZE 4 diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f746d85f36c7..9fc2753e516e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -594,6 +594,14 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE; +} + +extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu); +extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); extern void kvmppc_xive_native_init_module(void); extern void kvmppc_xive_native_exit_module(void); @@ -621,6 +629,11 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir int level, bool line_status) { return -ENODEV; } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) + { return 0; } +static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } +static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xive_native_init_module(void) { } static inline void kvmppc_xive_native_exit_module(void) { } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f78d002f0fe0..e7f1ada1c3de 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -380,11 +380,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) return -EBUSY; } -static u32 xive_vp(struct kvmppc_xive *xive, u32 server) -{ - return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); -} - static u8 xive_lock_and_mask(struct kvmppc_xive *xive, struct kvmppc_xive_src_block *sb, struct kvmppc_xive_irq_state *state) @@ -430,8 +425,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - MASKED, state->number); + kvmppc_xive_vp(xive, state->act_server), + MASKED, state->number); /* set old_p so we can track if an H_EOI was done */ state->old_p = true; state->old_q = false; @@ -486,8 +481,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - state->act_priority, state->number); + kvmppc_xive_vp(xive, state->act_server), + state->act_priority, state->number); /* If an EOI is needed, do it here */ if (!state->old_p) xive_vm_source_eoi(hw_num, xd); @@ -563,7 +558,7 @@ static int xive_target_interrupt(struct kvm *kvm, kvmppc_xive_select_irq(state, &hw_num, NULL); return xive_native_configure_irq(hw_num, - xive_vp(xive, server), + kvmppc_xive_vp(xive, server), prio, state->number); } @@ -951,7 +946,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, * which is fine for a never started interrupt. */ xive_native_configure_irq(hw_irq, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -1027,7 +1022,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, /* Reconfigure the IPI */ xive_native_configure_irq(state->ipi_number, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -1049,7 +1044,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, } EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); -static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct kvm *kvm = vcpu->kvm; @@ -1166,7 +1161,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = xive_vp(xive, cpu); + xc->vp_id = kvmppc_xive_vp(xive, cpu); xc->mfrr = 0xff; xc->valid = true; @@ -1883,6 +1878,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) return 0; } +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; + + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x...\n", q->toggle, + i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = + irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_puts(m, "\n"); + } + } + return 0; +} static int xive_debug_show(struct seq_file *m, void *private) { @@ -1908,7 +1940,6 @@ static int xive_debug_show(struct seq_file *m, void *private) kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - unsigned int i; if (!xc) continue; @@ -1918,33 +1949,8 @@ static int xive_debug_show(struct seq_file *m, void *private) xc->server_num, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); - for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { - struct xive_q *q = &xc->queues[i]; - u32 i0, i1, idx; - if (!q->qpage && !xc->esc_virq[i]) - continue; - - seq_printf(m, " [q%d]: ", i); - - if (q->qpage) { - idx = q->idx; - i0 = be32_to_cpup(q->qpage + idx); - idx = (idx + 1) & q->msk; - i1 = be32_to_cpup(q->qpage + idx); - seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); - } - if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); - seq_printf(m, "\n"); - } - } + kvmppc_xive_debug_show_queues(m, vcpu); t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index a08ae6fd4c51..d366df69b9cb 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -198,6 +198,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp return xive->src_blocks[bid]; } +static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) +{ + return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); +} + /* * Mapping between guest priorities and host priorities * is as follow. @@ -248,5 +253,11 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); +/* + * Common Xive routines for XICS-over-XIVE and XIVE native + */ +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); + #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 751259394150..6fa73cfd9d9c 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -26,6 +26,134 @@ #include "book3s_xive.h" +static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct xive_q *q = &xc->queues[prio]; + + xive_native_disable_queue(xc->vp_id, q, prio); + if (q->qpage) { + put_page(virt_to_page(q->qpage)); + q->qpage = NULL; + } +} + +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + int i; + + if (!kvmppc_xive_enabled(vcpu)) + return; + + if (!xc) + return; + + pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); + + /* Ensure no interrupt is still routed to that VP */ + xc->valid = false; + kvmppc_xive_disable_vcpu_interrupts(vcpu); + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + + /* Free the queues & associated interrupts */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + /* Free the escalation irq */ + if (xc->esc_virq[i]) { + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); + xc->esc_virq[i] = 0; + } + + /* Free the queue */ + kvmppc_xive_native_cleanup_queue(vcpu, i); + } + + /* Free the VP */ + kfree(xc); + + /* Cleanup the vcpu */ + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; + vcpu->arch.xive_vcpu = NULL; +} + +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 server_num) +{ + struct kvmppc_xive *xive = dev->private; + struct kvmppc_xive_vcpu *xc = NULL; + int rc; + + pr_devel("native_connect_vcpu(server=%d)\n", server_num); + + if (dev->ops != &kvm_xive_native_ops) { + pr_devel("Wrong ops !\n"); + return -EPERM; + } + if (xive->kvm != vcpu->kvm) + return -EPERM; + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) + return -EBUSY; + if (server_num >= KVM_MAX_VCPUS) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + mutex_lock(&vcpu->kvm->lock); + + if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { + pr_devel("Duplicate !\n"); + rc = -EEXIST; + goto bail; + } + + xc = kzalloc(sizeof(*xc), GFP_KERNEL); + if (!xc) { + rc = -ENOMEM; + goto bail; + } + + vcpu->arch.xive_vcpu = xc; + xc->xive = xive; + xc->vcpu = vcpu; + xc->server_num = server_num; + + xc->vp_id = kvmppc_xive_vp(xive, server_num); + xc->valid = true; + vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; + + rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); + if (rc) { + pr_err("Failed to get VP info from OPAL: %d\n", rc); + goto bail; + } + + /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (rc) { + pr_err("Failed to enable VP in OPAL: %d\n", rc); + goto bail; + } + + /* Configure VCPU fields for use by assembly push/pull */ + vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); + vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); + + /* TODO: reset all queues to a clean state ? */ +bail: + mutex_unlock(&vcpu->kvm->lock); + if (rc) + kvmppc_xive_native_cleanup_vcpu(vcpu); + + return rc; +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -114,10 +242,32 @@ static int xive_native_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; if (!kvm) return 0; + seq_puts(m, "=========\nVCPU state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + + if (!xc) + continue; + + seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, + vcpu->arch.xive_saved_state.nsr, + vcpu->arch.xive_saved_state.cppr, + vcpu->arch.xive_saved_state.ipb, + vcpu->arch.xive_saved_state.pipr, + vcpu->arch.xive_saved_state.w01, + (u32) vcpu->arch.xive_cam_word); + + kvmppc_xive_debug_show_queues(m, vcpu); + } + return 0; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..b0858ee61460 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -570,6 +570,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; +#ifdef CONFIG_KVM_XIVE + case KVM_CAP_PPC_IRQ_XIVE: + /* + * Return false until all the XIVE infrastructure is + * in place including support for migration. + */ + r = 0; + break; +#endif case KVM_CAP_PPC_ALLOC_HTAB: r = hv_enabled; @@ -753,6 +762,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) else kvmppc_xics_free_icp(vcpu); break; + case KVMPPC_IRQ_XIVE: + kvmppc_xive_native_cleanup_vcpu(vcpu); + break; } kvmppc_core_vcpu_free(vcpu); @@ -1941,6 +1953,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_CAP_PPC_IRQ_XIVE: { + struct fd f; + struct kvm_device *dev; + + r = -EBADF; + f = fdget(cap->args[0]); + if (!f.file) + break; + + r = -ENXIO; + if (!xive_enabled()) + break; + + r = -EPERM; + dev = kvm_device_from_filp(f.file); + if (dev) + r = kvmppc_xive_native_connect_vcpu(dev, vcpu, + cap->args[1]); + + fdput(f); + break; + } +#endif /* CONFIG_KVM_XIVE */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_FWNMI: r = -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6368163d3a0..52bf74a1616e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_VM_IPA_SIZE 165 #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_PPC_IRQ_XIVE 168 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-59-g8ed1b From 4131f83c3d64e591014dad14c7f8070c538b9422 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:29 +0200 Subject: KVM: PPC: Book3S HV: XIVE: add a control to initialize a source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XIVE KVM device maintains a list of interrupt sources for the VM which are allocated in the pool of generic interrupts (IPIs) of the main XIVE IC controller. These are used for the CPU IPIs as well as for virtual device interrupts. The IRQ number space is defined by QEMU. The XIVE device reuses the source structures of the XICS-on-XIVE device for the source blocks (2-level tree) and for the source interrupts. Under XIVE native, the source interrupt caches mostly configuration information and is less used than under the XICS-on-XIVE device in which hcalls are still necessary at run-time. When a source is initialized in KVM, an IPI interrupt source is simply allocated at the OPAL level and then MASKED. KVM only needs to know about its type: LSI or MSI. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 15 ++++ arch/powerpc/include/uapi/asm/kvm.h | 5 ++ arch/powerpc/kvm/book3s_xive.c | 8 +-- arch/powerpc/kvm/book3s_xive.h | 10 +++ arch/powerpc/kvm/book3s_xive_native.c | 106 +++++++++++++++++++++++++++++ 5 files changed, 140 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index fdbd2ff92a88..cd8bfc37b72e 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -17,3 +17,18 @@ the legacy interrupt mode, referred as XICS (POWER7/8). 1. KVM_DEV_XIVE_GRP_CTRL Provides global controls on the device + + 2. KVM_DEV_XIVE_GRP_SOURCE (write only) + Initializes a new source in the XIVE device and mask it. + Attributes: + Interrupt source number (64-bit) + The kvm_device_attr.addr points to a __u64 value: + bits: | 63 .... 2 | 1 | 0 + values: | unused | level | type + - type: 0:MSI 1:LSI + - level: assertion level in case of an LSI. + Errors: + -E2BIG: Interrupt source number is out of range + -ENOMEM: Could not create a new source block + -EFAULT: Invalid user pointer for attr->addr. + -ENXIO: Could not allocate underlying HW interrupt diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index be0ce1f17625..d468294c2a67 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -679,5 +679,10 @@ struct kvm_ppc_cpu_char { /* POWER9 XIVE Native Interrupt Controller */ #define KVM_DEV_XIVE_GRP_CTRL 1 +#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ + +/* Layout of 64-bit XIVE source attribute values */ +#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) +#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e7f1ada1c3de..6c9f9fd0855f 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1480,8 +1480,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) return 0; } -static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, - int irq) +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( + struct kvmppc_xive *xive, int irq) { struct kvm *kvm = xive->kvm; struct kvmppc_xive_src_block *sb; @@ -1560,7 +1560,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) sb = kvmppc_xive_find_source(xive, irq, &idx); if (!sb) { pr_devel("No source, creating source block...\n"); - sb = xive_create_src_block(xive, irq); + sb = kvmppc_xive_create_src_block(xive, irq); if (!sb) { pr_devel("Failed to create block...\n"); return -ENOMEM; @@ -1784,7 +1784,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) xive_cleanup_irq_data(xd); } -static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) { int i; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index d366df69b9cb..1be921cb5dcb 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -12,6 +12,13 @@ #ifdef CONFIG_KVM_XICS #include "book3s_xics.h" +/* + * The XIVE Interrupt source numbers are within the range 0 to + * KVMPPC_XICS_NR_IRQS. + */ +#define KVMPPC_XIVE_FIRST_IRQ 0 +#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS + /* * State for one guest irq source. * @@ -258,6 +265,9 @@ extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); */ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( + struct kvmppc_xive *xive, int irq); +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6fa73cfd9d9c..5f2bd6c137b7 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -26,6 +26,17 @@ #include "book3s_xive.h" +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) +{ + u64 val; + + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) + offset |= offset << 4; + + val = in_be64(xd->eoi_mmio + offset); + return (u8)val; +} + static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -154,12 +165,94 @@ bail: return rc; } +static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, + u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u64 val; + u16 idx; + int rc; + + pr_devel("%s irq=0x%lx\n", __func__, irq); + + if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) + return -E2BIG; + + sb = kvmppc_xive_find_source(xive, irq, &idx); + if (!sb) { + pr_debug("No source, creating source block...\n"); + sb = kvmppc_xive_create_src_block(xive, irq); + if (!sb) { + pr_err("Failed to create block...\n"); + return -ENOMEM; + } + } + state = &sb->irq_state[idx]; + + if (get_user(val, ubufp)) { + pr_err("fault getting user info !\n"); + return -EFAULT; + } + + arch_spin_lock(&sb->lock); + + /* + * If the source doesn't already have an IPI, allocate + * one and get the corresponding data + */ + if (!state->ipi_number) { + state->ipi_number = xive_native_alloc_irq(); + if (state->ipi_number == 0) { + pr_err("Failed to allocate IRQ !\n"); + rc = -ENXIO; + goto unlock; + } + xive_native_populate_irq_data(state->ipi_number, + &state->ipi_data); + pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, + state->ipi_number, irq); + } + + /* Restore LSI state */ + if (val & KVM_XIVE_LEVEL_SENSITIVE) { + state->lsi = true; + if (val & KVM_XIVE_LEVEL_ASSERTED) + state->asserted = true; + pr_devel(" LSI ! Asserted=%d\n", state->asserted); + } + + /* Mask IRQ to start with */ + state->act_server = 0; + state->act_priority = MASKED; + xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); + + /* Increment the number of valid sources and mark this one valid */ + if (!state->valid) + xive->src_count++; + state->valid = true; + + rc = 0; + +unlock: + arch_spin_unlock(&sb->lock); + + return rc; +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + struct kvmppc_xive *xive = dev->private; + switch (attr->group) { case KVM_DEV_XIVE_GRP_CTRL: break; + case KVM_DEV_XIVE_GRP_SOURCE: + return kvmppc_xive_native_set_source(xive, attr->attr, + attr->addr); } return -ENXIO; } @@ -176,6 +269,11 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_XIVE_GRP_CTRL: break; + case KVM_DEV_XIVE_GRP_SOURCE: + if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && + attr->attr < KVMPPC_XIVE_NR_IRQS) + return 0; + break; } return -ENXIO; } @@ -184,6 +282,7 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) { struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; + int i; debugfs_remove(xive->dentry); @@ -192,6 +291,13 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) if (kvm) kvm->arch.xive = NULL; + for (i = 0; i <= xive->max_sbid; i++) { + if (xive->src_blocks[i]) + kvmppc_xive_free_sources(xive->src_blocks[i]); + kfree(xive->src_blocks[i]); + xive->src_blocks[i] = NULL; + } + if (xive->vp_base != XIVE_INVALID_VP) xive_native_free_vp_block(xive->vp_base); -- cgit v1.2.3-59-g8ed1b From e8676ce50e224d507946b1c535bc13584e6b49ff Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:30 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a control to configure a source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This control will be used by the H_INT_SET_SOURCE_CONFIG hcall from QEMU to configure the target of a source and also to restore the configuration of a source when migrating the VM. The XIVE source interrupt structure is extended with the value of the Effective Interrupt Source Number. The EISN is the interrupt number pushed in the event queue that the guest OS will use to dispatch events internally. Caching the EISN value in KVM eases the test when checking if a reconfiguration is indeed needed. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 21 +++++++ arch/powerpc/include/uapi/asm/kvm.h | 11 ++++ arch/powerpc/kvm/book3s_xive.c | 5 +- arch/powerpc/kvm/book3s_xive.h | 4 ++ arch/powerpc/kvm/book3s_xive_native.c | 97 ++++++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index cd8bfc37b72e..33c64b2cdbe8 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -32,3 +32,24 @@ the legacy interrupt mode, referred as XICS (POWER7/8). -ENOMEM: Could not create a new source block -EFAULT: Invalid user pointer for attr->addr. -ENXIO: Could not allocate underlying HW interrupt + + 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only) + Configures source targeting + Attributes: + Interrupt source number (64-bit) + The kvm_device_attr.addr points to a __u64 value: + bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0 + values: | eisn | mask | server | priority + - priority: 0-7 interrupt priority level + - server: CPU number chosen to handle the interrupt + - mask: mask flag (unused) + - eisn: Effective Interrupt Source Number + Errors: + -ENOENT: Unknown source number + -EINVAL: Not initialized source number + -EINVAL: Invalid priority + -EINVAL: Invalid CPU number. + -EFAULT: Invalid user pointer for attr->addr. + -ENXIO: CPU event queues not configured or configuration of the + underlying HW interrupt failed + -EBUSY: No CPU available to serve interrupt diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index d468294c2a67..e8161e21629b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -680,9 +680,20 @@ struct kvm_ppc_cpu_char { /* POWER9 XIVE Native Interrupt Controller */ #define KVM_DEV_XIVE_GRP_CTRL 1 #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ /* Layout of 64-bit XIVE source attribute values */ #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) #define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) +/* Layout of 64-bit XIVE source configuration attribute values */ +#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0 +#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7 +#define KVM_XIVE_SOURCE_SERVER_SHIFT 3 +#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL +#define KVM_XIVE_SOURCE_MASKED_SHIFT 32 +#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL +#define KVM_XIVE_SOURCE_EISN_SHIFT 33 +#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6c9f9fd0855f..e09f3addffe5 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -342,7 +342,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio) return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; } -static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) +int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) { struct kvm_vcpu *vcpu; int i, rc; @@ -530,7 +530,7 @@ static int xive_target_interrupt(struct kvm *kvm, * priority. The count for that new target will have * already been incremented. */ - rc = xive_select_target(kvm, &server, prio); + rc = kvmppc_xive_select_target(kvm, &server, prio); /* * We failed to find a target ? Not much we can do @@ -1504,6 +1504,7 @@ struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; + sb->irq_state[i].eisn = 0; sb->irq_state[i].guest_priority = MASKED; sb->irq_state[i].saved_priority = MASKED; sb->irq_state[i].act_priority = MASKED; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 1be921cb5dcb..ae26fe653d98 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -61,6 +61,9 @@ struct kvmppc_xive_irq_state { bool saved_p; bool saved_q; u8 saved_scan_prio; + + /* Xive native */ + u32 eisn; /* Guest Effective IRQ number */ }; /* Select the "right" interrupt (IPI vs. passthrough) */ @@ -268,6 +271,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( struct kvmppc_xive *xive, int irq); void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); +int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5f2bd6c137b7..492825a35958 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -242,6 +242,99 @@ unlock: return rc; } +static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, + struct kvmppc_xive_src_block *sb, + struct kvmppc_xive_irq_state *state, + u32 server, u8 priority, bool masked, + u32 eisn) +{ + struct kvm *kvm = xive->kvm; + u32 hw_num; + int rc = 0; + + arch_spin_lock(&sb->lock); + + if (state->act_server == server && state->act_priority == priority && + state->eisn == eisn) + goto unlock; + + pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", + priority, server, masked, state->act_server, + state->act_priority); + + kvmppc_xive_select_irq(state, &hw_num, NULL); + + if (priority != MASKED && !masked) { + rc = kvmppc_xive_select_target(kvm, &server, priority); + if (rc) + goto unlock; + + state->act_priority = priority; + state->act_server = server; + state->eisn = eisn; + + rc = xive_native_configure_irq(hw_num, + kvmppc_xive_vp(xive, server), + priority, eisn); + } else { + state->act_priority = MASKED; + state->act_server = 0; + state->eisn = 0; + + rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); + } + +unlock: + arch_spin_unlock(&sb->lock); + return rc; +} + +static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, + long irq, u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u16 src; + u64 kvm_cfg; + u32 server; + u8 priority; + bool masked; + u32 eisn; + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + if (!state->valid) + return -EINVAL; + + if (get_user(kvm_cfg, ubufp)) + return -EFAULT; + + pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); + + priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> + KVM_XIVE_SOURCE_PRIORITY_SHIFT; + server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> + KVM_XIVE_SOURCE_SERVER_SHIFT; + masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> + KVM_XIVE_SOURCE_MASKED_SHIFT; + eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> + KVM_XIVE_SOURCE_EISN_SHIFT; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("invalid priority for queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + + return kvmppc_xive_native_update_source_config(xive, sb, state, server, + priority, masked, eisn); +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -253,6 +346,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_SOURCE: return kvmppc_xive_native_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: + return kvmppc_xive_native_set_source_config(xive, attr->attr, + attr->addr); } return -ENXIO; } @@ -270,6 +366,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_CTRL: break; case KVM_DEV_XIVE_GRP_SOURCE: + case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && attr->attr < KVMPPC_XIVE_NR_IRQS) return 0; -- cgit v1.2.3-59-g8ed1b From 13ce3297c5766b9541b6a7a255794c5168a7ae1a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:31 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add controls for the EQ configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These controls will be used by the H_INT_SET_QUEUE_CONFIG and H_INT_GET_QUEUE_CONFIG hcalls from QEMU to configure the underlying Event Queue in the XIVE IC. They will also be used to restore the configuration of the XIVE EQs and to capture the internal run-time state of the EQs. Both 'get' and 'set' rely on an OPAL call to access the EQ toggle bit and EQ index which are updated by the XIVE IC when event notifications are enqueued in the EQ. The value of the guest physical address of the event queue is saved in the XIVE internal xive_q structure for later use. That is when migration needs to mark the EQ pages dirty to capture a consistent memory state of the VM. To be noted that H_INT_SET_QUEUE_CONFIG does not require the extra OPAL call setting the EQ toggle bit and EQ index to configure the EQ, but restoring the EQ state will. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 34 ++++ arch/powerpc/include/asm/xive.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 19 +++ arch/powerpc/kvm/book3s_xive.c | 15 +- arch/powerpc/kvm/book3s_xive.h | 2 + arch/powerpc/kvm/book3s_xive_native.c | 249 +++++++++++++++++++++++++++++ 6 files changed, 315 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 33c64b2cdbe8..cc13bfd5cf53 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -53,3 +53,37 @@ the legacy interrupt mode, referred as XICS (POWER7/8). -ENXIO: CPU event queues not configured or configuration of the underlying HW interrupt failed -EBUSY: No CPU available to serve interrupt + + 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) + Configures an event queue of a CPU + Attributes: + EQ descriptor identifier (64-bit) + The EQ descriptor identifier is a tuple (server, priority) : + bits: | 63 .... 32 | 31 .. 3 | 2 .. 0 + values: | unused | server | priority + The kvm_device_attr.addr points to : + struct kvm_ppc_xive_eq { + __u32 flags; + __u32 qshift; + __u64 qaddr; + __u32 qtoggle; + __u32 qindex; + __u8 pad[40]; + }; + - flags: queue flags + KVM_XIVE_EQ_ALWAYS_NOTIFY (required) + forces notification without using the coalescing mechanism + provided by the XIVE END ESBs. + - qshift: queue size (power of 2) + - qaddr: real address of queue + - qtoggle: current queue toggle bit + - qindex: current queue index + - pad: reserved for future use + Errors: + -ENOENT: Invalid CPU number + -EINVAL: Invalid priority + -EINVAL: Invalid flags + -EINVAL: Invalid queue size + -EINVAL: Invalid queue address + -EFAULT: Invalid user pointer for attr->addr. + -EIO: Configuration of the underlying HW failed diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index b579a943407b..c4e88abd3b67 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -73,6 +73,8 @@ struct xive_q { u32 esc_irq; atomic_t count; atomic_t pending_count; + u64 guest_qaddr; + u32 guest_qshift; }; /* Global enable flags for the XIVE support */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e8161e21629b..85005400fd86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -681,6 +681,7 @@ struct kvm_ppc_cpu_char { #define KVM_DEV_XIVE_GRP_CTRL 1 #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ /* Layout of 64-bit XIVE source attribute values */ #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) @@ -696,4 +697,22 @@ struct kvm_ppc_cpu_char { #define KVM_XIVE_SOURCE_EISN_SHIFT 33 #define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL +/* Layout of 64-bit EQ identifier */ +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0 +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7 +#define KVM_XIVE_EQ_SERVER_SHIFT 3 +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL + +/* Layout of EQ configuration values (64 bytes) */ +struct kvm_ppc_xive_eq { + __u32 flags; + __u32 qshift; + __u64 qaddr; + __u32 qtoggle; + __u32 qindex; + __u8 pad[40]; +}; + +#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e09f3addffe5..c1b7aa7dbc28 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data) return IRQ_HANDLED; } -static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct xive_q *q = &xc->queues[prio]; @@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - if (xc->xive->single_escalation) + if (single_escalation) name = kasprintf(GFP_KERNEL, "kvm-%d-%d", vcpu->kvm->arch.lpid, xc->server_num); else @@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) * interrupt, thus leaving it effectively masked after * it fires once. */ - if (xc->xive->single_escalation) { + if (single_escalation) { struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) continue; rc = xive_provision_queue(vcpu, prio); if (rc == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, prio); + kvmppc_xive_attach_escalation(vcpu, prio, + xive->single_escalation); if (rc) return rc; } @@ -1214,7 +1216,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); if (r == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, i); + kvmppc_xive_attach_escalation( + vcpu, i, xive->single_escalation); if (r) goto bail; } else { @@ -1229,7 +1232,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } /* If not done above, attach priority 0 escalation */ - r = xive_attach_escalation(vcpu, 0); + r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); if (r) goto bail; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index ae26fe653d98..622f594d93e1 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -272,6 +272,8 @@ struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( struct kvmppc_xive *xive, int irq); void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 492825a35958..3e7cdcacc932 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -335,6 +335,243 @@ static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, priority, masked, eisn); } +static int xive_native_validate_queue_size(u32 qshift) +{ + /* + * We only support 64K pages for the moment. This is also + * advertised in the DT property "ibm,xive-eq-sizes" + */ + switch (qshift) { + case 0: /* EQ reset */ + case 16: + return 0; + case 12: + case 21: + case 24: + default: + return -EINVAL; + } +} + +static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, + long eq_idx, u64 addr) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + struct kvmppc_xive_vcpu *xc; + void __user *ubufp = (void __user *) addr; + u32 server; + u8 priority; + struct kvm_ppc_xive_eq kvm_eq; + int rc; + __be32 *qaddr = 0; + struct page *page; + struct xive_q *q; + gfn_t gfn; + unsigned long page_size; + + /* + * Demangle priority/server tuple from the EQ identifier + */ + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> + KVM_XIVE_EQ_PRIORITY_SHIFT; + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> + KVM_XIVE_EQ_SERVER_SHIFT; + + if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) + return -EFAULT; + + vcpu = kvmppc_xive_find_server(kvm, server); + if (!vcpu) { + pr_err("Can't find server %d\n", server); + return -ENOENT; + } + xc = vcpu->arch.xive_vcpu; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("Trying to restore invalid queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + q = &xc->queues[priority]; + + pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", + __func__, server, priority, kvm_eq.flags, + kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); + + /* + * sPAPR specifies a "Unconditional Notify (n) flag" for the + * H_INT_SET_QUEUE_CONFIG hcall which forces notification + * without using the coalescing mechanisms provided by the + * XIVE END ESBs. This is required on KVM as notification + * using the END ESBs is not supported. + */ + if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { + pr_err("invalid flags %d\n", kvm_eq.flags); + return -EINVAL; + } + + rc = xive_native_validate_queue_size(kvm_eq.qshift); + if (rc) { + pr_err("invalid queue size %d\n", kvm_eq.qshift); + return rc; + } + + /* reset queue and disable queueing */ + if (!kvm_eq.qshift) { + q->guest_qaddr = 0; + q->guest_qshift = 0; + + rc = xive_native_configure_queue(xc->vp_id, q, priority, + NULL, 0, true); + if (rc) { + pr_err("Failed to reset queue %d for VCPU %d: %d\n", + priority, xc->server_num, rc); + return rc; + } + + if (q->qpage) { + put_page(virt_to_page(q->qpage)); + q->qpage = NULL; + } + + return 0; + } + + if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { + pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, + 1ull << kvm_eq.qshift); + return -EINVAL; + } + + gfn = gpa_to_gfn(kvm_eq.qaddr); + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); + return -EINVAL; + } + + page_size = kvm_host_page_size(kvm, gfn); + if (1ull << kvm_eq.qshift > page_size) { + pr_warn("Incompatible host page size %lx!\n", page_size); + return -EINVAL; + } + + qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); + + /* + * Backup the queue page guest address to the mark EQ page + * dirty for migration. + */ + q->guest_qaddr = kvm_eq.qaddr; + q->guest_qshift = kvm_eq.qshift; + + /* + * Unconditional Notification is forced by default at the + * OPAL level because the use of END ESBs is not supported by + * Linux. + */ + rc = xive_native_configure_queue(xc->vp_id, q, priority, + (__be32 *) qaddr, kvm_eq.qshift, true); + if (rc) { + pr_err("Failed to configure queue %d for VCPU %d: %d\n", + priority, xc->server_num, rc); + put_page(page); + return rc; + } + + /* + * Only restore the queue state when needed. When doing the + * H_INT_SET_SOURCE_CONFIG hcall, it should not. + */ + if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { + rc = xive_native_set_queue_state(xc->vp_id, priority, + kvm_eq.qtoggle, + kvm_eq.qindex); + if (rc) + goto error; + } + + rc = kvmppc_xive_attach_escalation(vcpu, priority, + xive->single_escalation); +error: + if (rc) + kvmppc_xive_native_cleanup_queue(vcpu, priority); + return rc; +} + +static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, + long eq_idx, u64 addr) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + struct kvmppc_xive_vcpu *xc; + struct xive_q *q; + void __user *ubufp = (u64 __user *) addr; + u32 server; + u8 priority; + struct kvm_ppc_xive_eq kvm_eq; + u64 qaddr; + u64 qshift; + u64 qeoi_page; + u32 escalate_irq; + u64 qflags; + int rc; + + /* + * Demangle priority/server tuple from the EQ identifier + */ + priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> + KVM_XIVE_EQ_PRIORITY_SHIFT; + server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> + KVM_XIVE_EQ_SERVER_SHIFT; + + vcpu = kvmppc_xive_find_server(kvm, server); + if (!vcpu) { + pr_err("Can't find server %d\n", server); + return -ENOENT; + } + xc = vcpu->arch.xive_vcpu; + + if (priority != xive_prio_from_guest(priority)) { + pr_err("invalid priority for queue %d for VCPU %d\n", + priority, server); + return -EINVAL; + } + q = &xc->queues[priority]; + + memset(&kvm_eq, 0, sizeof(kvm_eq)); + + if (!q->qpage) + return 0; + + rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, + &qeoi_page, &escalate_irq, &qflags); + if (rc) + return rc; + + kvm_eq.flags = 0; + if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) + kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; + + kvm_eq.qshift = q->guest_qshift; + kvm_eq.qaddr = q->guest_qaddr; + + rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, + &kvm_eq.qindex); + if (rc) + return rc; + + pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", + __func__, server, priority, kvm_eq.flags, + kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); + + if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) + return -EFAULT; + + return 0; +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -349,6 +586,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: return kvmppc_xive_native_set_source_config(xive, attr->attr, attr->addr); + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return kvmppc_xive_native_set_queue_config(xive, attr->attr, + attr->addr); } return -ENXIO; } @@ -356,6 +596,13 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, static int kvmppc_xive_native_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + struct kvmppc_xive *xive = dev->private; + + switch (attr->group) { + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return kvmppc_xive_native_get_queue_config(xive, attr->attr, + attr->addr); + } return -ENXIO; } @@ -371,6 +618,8 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, attr->attr < KVMPPC_XIVE_NR_IRQS) return 0; break; + case KVM_DEV_XIVE_GRP_EQ_CONFIG: + return 0; } return -ENXIO; } -- cgit v1.2.3-59-g8ed1b From 5ca806474859a0e94584b3a63f9509a25758408e Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:32 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a global reset control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This control is to be used by the H_INT_RESET hcall from QEMU. Its purpose is to clear all configuration of the sources and EQs. This is necessary in case of a kexec (for a kdump kernel for instance) to make sure that no remaining configuration is left from the previous boot setup so that the new kernel can start safely from a clean state. The queue 7 is ignored when the XIVE device is configured to run in single escalation mode. Prio 7 is used by escalations. The XIVE VP is kept enabled as the vCPU is still active and connected to the XIVE device. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 5 ++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 85 ++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index cc13bfd5cf53..429cbc4cf960 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -17,6 +17,11 @@ the legacy interrupt mode, referred as XICS (POWER7/8). 1. KVM_DEV_XIVE_GRP_CTRL Provides global controls on the device + Attributes: + 1.1 KVM_DEV_XIVE_RESET (write only) + Resets the interrupt controller configuration for sources and event + queues. To be used by kexec and kdump. + Errors: none 2. KVM_DEV_XIVE_GRP_SOURCE (write only) Initializes a new source in the XIVE device and mask it. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 85005400fd86..f045f9dee42e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -679,6 +679,7 @@ struct kvm_ppc_cpu_char { /* POWER9 XIVE Native Interrupt Controller */ #define KVM_DEV_XIVE_GRP_CTRL 1 +#define KVM_DEV_XIVE_RESET 1 #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 3e7cdcacc932..b9597d80c95a 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -572,6 +572,83 @@ static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, return 0; } +static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) +{ + int i; + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; + + if (!state->valid) + continue; + + if (state->act_priority == MASKED) + continue; + + state->eisn = 0; + state->act_server = 0; + state->act_priority = MASKED; + xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); + if (state->pt_number) { + xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); + xive_native_configure_irq(state->pt_number, + 0, MASKED, 0); + } + } +} + +static int kvmppc_xive_reset(struct kvmppc_xive *xive) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + pr_devel("%s\n", __func__); + + mutex_lock(&kvm->lock); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int prio; + + if (!xc) + continue; + + kvmppc_xive_disable_vcpu_interrupts(vcpu); + + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + + /* Single escalation, no queue 7 */ + if (prio == 7 && xive->single_escalation) + break; + + if (xc->esc_virq[prio]) { + free_irq(xc->esc_virq[prio], vcpu); + irq_dispose_mapping(xc->esc_virq[prio]); + kfree(xc->esc_virq_names[prio]); + xc->esc_virq[prio] = 0; + } + + kvmppc_xive_native_cleanup_queue(vcpu, prio); + } + } + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_reset_sources(sb); + arch_spin_unlock(&sb->lock); + } + } + + mutex_unlock(&kvm->lock); + + return 0; +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -579,6 +656,10 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_XIVE_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XIVE_RESET: + return kvmppc_xive_reset(xive); + } break; case KVM_DEV_XIVE_GRP_SOURCE: return kvmppc_xive_native_set_source(xive, attr->attr, @@ -611,6 +692,10 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, { switch (attr->group) { case KVM_DEV_XIVE_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XIVE_RESET: + return 0; + } break; case KVM_DEV_XIVE_GRP_SOURCE: case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: -- cgit v1.2.3-59-g8ed1b From 7b46b6169ab80f8f415a0ca2ea4aa7f1afdcc4f3 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:33 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a control to sync the sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This control will be used by the H_INT_SYNC hcall from QEMU to flush event notifications on the XIVE IC owning the source. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 8 +++++++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 36 ++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 429cbc4cf960..1e7f19d7594b 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -92,3 +92,11 @@ the legacy interrupt mode, referred as XICS (POWER7/8). -EINVAL: Invalid queue address -EFAULT: Invalid user pointer for attr->addr. -EIO: Configuration of the underlying HW failed + + 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only) + Synchronize the source to flush event notifications + Attributes: + Interrupt source number (64-bit) + Errors: + -ENOENT: Unknown source number + -EINVAL: Not initialized source number diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index f045f9dee42e..e4abe30f6fc6 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -683,6 +683,7 @@ struct kvm_ppc_cpu_char { #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */ /* Layout of 64-bit XIVE source attribute values */ #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index b9597d80c95a..65380416d101 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -335,6 +335,38 @@ static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, priority, masked, eisn); } +static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, + long irq, u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + struct xive_irq_data *xd; + u32 hw_num; + u16 src; + int rc = 0; + + pr_devel("%s irq=0x%lx", __func__, irq); + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + rc = -EINVAL; + + arch_spin_lock(&sb->lock); + + if (state->valid) { + kvmppc_xive_select_irq(state, &hw_num, &xd); + xive_native_sync_source(hw_num); + rc = 0; + } + + arch_spin_unlock(&sb->lock); + return rc; +} + static int xive_native_validate_queue_size(u32 qshift) { /* @@ -670,6 +702,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_EQ_CONFIG: return kvmppc_xive_native_set_queue_config(xive, attr->attr, attr->addr); + case KVM_DEV_XIVE_GRP_SOURCE_SYNC: + return kvmppc_xive_native_sync_source(xive, attr->attr, + attr->addr); } return -ENXIO; } @@ -699,6 +734,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, break; case KVM_DEV_XIVE_GRP_SOURCE: case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: + case KVM_DEV_XIVE_GRP_SOURCE_SYNC: if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && attr->attr < KVMPPC_XIVE_NR_IRQS) return 0; -- cgit v1.2.3-59-g8ed1b From e6714bd1671da9d8dfb5332075df251b746fd0fd Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:34 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a control to dirty the XIVE EQ pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When migration of a VM is initiated, a first copy of the RAM is transferred to the destination before the VM is stopped, but there is no guarantee that the EQ pages in which the event notifications are queued have not been modified. To make sure migration will capture a consistent memory state, the XIVE device should perform a XIVE quiesce sequence to stop the flow of event notifications and stabilize the EQs. This is the purpose of the KVM_DEV_XIVE_EQ_SYNC control which will also marks the EQ pages dirty to force their transfer. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 29 ++++++++++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 85 ++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 1e7f19d7594b..7ffd4c7be7b5 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -23,6 +23,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8). queues. To be used by kexec and kdump. Errors: none + 1.2 KVM_DEV_XIVE_EQ_SYNC (write only) + Sync all the sources and queues and mark the EQ pages dirty. This + to make sure that a consistent memory state is captured when + migrating the VM. + Errors: none + 2. KVM_DEV_XIVE_GRP_SOURCE (write only) Initializes a new source in the XIVE device and mask it. Attributes: @@ -100,3 +106,26 @@ the legacy interrupt mode, referred as XICS (POWER7/8). Errors: -ENOENT: Unknown source number -EINVAL: Not initialized source number + +* Migration: + + Saving the state of a VM using the XIVE native exploitation mode + should follow a specific sequence. When the VM is stopped : + + 1. Mask all sources (PQ=01) to stop the flow of events. + + 2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to + flush any in-flight event notification and to stabilize the EQs. At + this stage, the EQ pages are marked dirty to make sure they are + transferred in the migration sequence. + + 3. Capture the state of the source targeting, the EQs configuration + and the state of thread interrupt context registers. + + Restore is similar : + + 1. Restore the EQ configuration. As targeting depends on it. + 2. Restore targeting + 3. Restore the thread interrupt contexts + 4. Restore the source states + 5. Let the vCPU run diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e4abe30f6fc6..12744608a61c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char { /* POWER9 XIVE Native Interrupt Controller */ #define KVM_DEV_XIVE_GRP_CTRL 1 #define KVM_DEV_XIVE_RESET 1 +#define KVM_DEV_XIVE_EQ_SYNC 2 #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 65380416d101..16d23ef3bd39 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -681,6 +681,88 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive) return 0; } +static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) +{ + int j; + + for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; + struct xive_irq_data *xd; + u32 hw_num; + + if (!state->valid) + continue; + + /* + * The struct kvmppc_xive_irq_state reflects the state + * of the EAS configuration and not the state of the + * source. The source is masked setting the PQ bits to + * '-Q', which is what is being done before calling + * the KVM_DEV_XIVE_EQ_SYNC control. + * + * If a source EAS is configured, OPAL syncs the XIVE + * IC of the source and the XIVE IC of the previous + * target if any. + * + * So it should be fine ignoring MASKED sources as + * they have been synced already. + */ + if (state->act_priority == MASKED) + continue; + + kvmppc_xive_select_irq(state, &hw_num, &xd); + xive_native_sync_source(hw_num); + xive_native_sync_queue(hw_num); + } +} + +static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int prio; + + if (!xc) + return -ENOENT; + + for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + + if (!q->qpage) + continue; + + /* Mark EQ page dirty for migration */ + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); + } + return 0; +} + +static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) +{ + struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; + unsigned int i; + + pr_devel("%s\n", __func__); + + mutex_lock(&kvm->lock); + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_native_sync_sources(sb); + arch_spin_unlock(&sb->lock); + } + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvmppc_xive_native_vcpu_eq_sync(vcpu); + } + mutex_unlock(&kvm->lock); + + return 0; +} + static int kvmppc_xive_native_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -691,6 +773,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_XIVE_RESET: return kvmppc_xive_reset(xive); + case KVM_DEV_XIVE_EQ_SYNC: + return kvmppc_xive_native_eq_sync(xive); } break; case KVM_DEV_XIVE_GRP_SOURCE: @@ -729,6 +813,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_CTRL: switch (attr->attr) { case KVM_DEV_XIVE_RESET: + case KVM_DEV_XIVE_EQ_SYNC: return 0; } break; -- cgit v1.2.3-59-g8ed1b From e4945b9da52b36052b7c509ca31c5ead1d165b24 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:35 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add get/set accessors for the VP XIVE state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The state of the thread interrupt management registers needs to be collected for migration. These registers are cached under the 'xive_saved_state.w01' field of the VCPU when the VPCU context is pulled from the HW thread. An OPAL call retrieves the backup of the IPB register in the underlying XIVE NVT structure and merges it in the KVM state. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 1 + Documentation/virtual/kvm/devices/xive.txt | 17 +++++++ arch/powerpc/include/asm/kvm_ppc.h | 11 +++++ arch/powerpc/include/uapi/asm/kvm.h | 2 + arch/powerpc/kvm/book3s.c | 24 ++++++++++ arch/powerpc/kvm/book3s_xive_native.c | 76 ++++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e38eb17b7be6..5b505520a616 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1985,6 +1985,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TLB3PS | 32 PPC | KVM_REG_PPC_EPTCFG | 32 PPC | KVM_REG_PPC_ICP_STATE | 64 + PPC | KVM_REG_PPC_VP_STATE | 128 PPC | KVM_REG_PPC_TB_OFFSET | 64 PPC | KVM_REG_PPC_SPMC1 | 32 PPC | KVM_REG_PPC_SPMC2 | 32 diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 7ffd4c7be7b5..525d1eebcf34 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -107,6 +107,23 @@ the legacy interrupt mode, referred as XICS (POWER7/8). -ENOENT: Unknown source number -EINVAL: Not initialized source number +* VCPU state + + The XIVE IC maintains VP interrupt state in an internal structure + called the NVT. When a VP is not dispatched on a HW processor + thread, this structure can be updated by HW if the VP is the target + of an event notification. + + It is important for migration to capture the cached IPB from the NVT + as it synthesizes the priorities of the pending interrupts. We + capture a bit more to report debug information. + + KVM_REG_PPC_VP_STATE (2 * 64bits) + bits: | 63 .... 32 | 31 .... 0 | + values: | TIMA word0 | TIMA word1 | + bits: | 127 .......... 64 | + values: | unused | + * Migration: Saving the state of a VM using the XIVE native exploitation mode diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9fc2753e516e..bc892380e6cd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -269,6 +269,7 @@ union kvmppc_one_reg { u64 addr; u64 length; } vpaval; + u64 xive_timaval[2]; }; struct kvmppc_ops { @@ -604,6 +605,10 @@ extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); extern void kvmppc_xive_native_init_module(void); extern void kvmppc_xive_native_exit_module(void); +extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, + union kvmppc_one_reg *val); +extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, + union kvmppc_one_reg *val); #else static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, @@ -636,6 +641,12 @@ static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xive_native_init_module(void) { } static inline void kvmppc_xive_native_exit_module(void) { } +static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, + union kvmppc_one_reg *val) +{ return 0; } +static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, + union kvmppc_one_reg *val) +{ return -ENOENT; } #endif /* CONFIG_KVM_XIVE */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 12744608a61c..cd3f16b70a2e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ #define KVM_REG_PPC_ICP_PPRI_MASK 0xff +#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 7c3348fa27e1..efd15101eef0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_native_get_vp(vcpu, val); + else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_native_set_vp(vcpu, val); + else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 16d23ef3bd39..2f9d5e9439a6 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -896,6 +896,82 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) return ret; } +/* + * Interrupt Pending Buffer (IPB) offset + */ +#define TM_IPB_SHIFT 40 +#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) + +int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + u64 opal_state; + int rc; + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc) + return -ENOENT; + + /* Thread context registers. We only care about IPB and CPPR */ + val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; + + /* Get the VP state from OPAL */ + rc = xive_native_get_vp_state(xc->vp_id, &opal_state); + if (rc) + return rc; + + /* + * Capture the backup of IPB register in the NVT structure and + * merge it in our KVM VP state. + */ + val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); + + pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", + __func__, + vcpu->arch.xive_saved_state.nsr, + vcpu->arch.xive_saved_state.cppr, + vcpu->arch.xive_saved_state.ipb, + vcpu->arch.xive_saved_state.pipr, + vcpu->arch.xive_saved_state.w01, + (u32) vcpu->arch.xive_cam_word, opal_state); + + return 0; +} + +int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; + + pr_devel("%s w01=%016llx vp=%016llx\n", __func__, + val->xive_timaval[0], val->xive_timaval[1]); + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc || !xive) + return -ENOENT; + + /* We can't update the state of a "pushed" VCPU */ + if (WARN_ON(vcpu->arch.xive_pushed)) + return -EBUSY; + + /* + * Restore the thread context registers. IPB and CPPR should + * be the only ones that matter. + */ + vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; + + /* + * There is no need to restore the XIVE internal state (IPB + * stored in the NVT) as the IPB register was merged in KVM VP + * state when captured. + */ + return 0; +} + static int xive_native_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; -- cgit v1.2.3-59-g8ed1b From 39e9af3de5ca936098bc80ebe14401426673c208 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:37 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a TIMA mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each thread has an associated Thread Interrupt Management context composed of a set of registers. These registers let the thread handle priority management and interrupt acknowledgment. The most important are : - Interrupt Pending Buffer (IPB) - Current Processor Priority (CPPR) - Notification Source Register (NSR) They are exposed to software in four different pages each proposing a view with a different privilege. The first page is for the physical thread context and the second for the hypervisor. Only the third (operating system) and the fourth (user level) are exposed the guest. A custom VM fault handler will populate the VMA with the appropriate pages, which should only be the OS page for now. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 23 ++++++++++++++++++ arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kvm/book3s_xive_native.c | 39 ++++++++++++++++++++++++++++++ arch/powerpc/sysdev/xive/native.c | 11 +++++++++ 5 files changed, 76 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 525d1eebcf34..0cd7847ec38a 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -13,6 +13,29 @@ requires a POWER9 host and the guest OS should have support for the XIVE native exploitation interrupt mode. If not, it should run using the legacy interrupt mode, referred as XICS (POWER7/8). +* Device Mappings + + The KVM device exposes different MMIO ranges of the XIVE HW which + are required for interrupt management. These are exposed to the + guest in VMAs populated with a custom VM fault handler. + + 1. Thread Interrupt Management Area (TIMA) + + Each thread has an associated Thread Interrupt Management context + composed of a set of registers. These registers let the thread + handle priority management and interrupt acknowledgment. The most + important are : + + - Interrupt Pending Buffer (IPB) + - Current Processor Priority (CPPR) + - Notification Source Register (NSR) + + They are exposed to software in four different pages each proposing + a view with a different privilege. The first page is for the + physical thread context and the second for the hypervisor. Only the + third (operating system) and the fourth (user level) are exposed the + guest. + * Groups: 1. KVM_DEV_XIVE_GRP_CTRL diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index c4e88abd3b67..eaf76f57023a 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -23,6 +23,7 @@ * same offset regardless of where the code is executing */ extern void __iomem *xive_tima; +extern unsigned long xive_tima_os; /* * Offset in the TM area of our current execution level (provided by diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index cd3f16b70a2e..0998e8edc91a 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -720,4 +720,6 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 +#define KVM_XIVE_TIMA_PAGE_OFFSET 0 + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 2f9d5e9439a6..f5314da0cb45 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -165,6 +165,44 @@ bail: return rc; } +static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + switch (vmf->pgoff - vma->vm_pgoff) { + case 0: /* HW - forbid access */ + case 1: /* HV - forbid access */ + return VM_FAULT_SIGBUS; + case 2: /* OS */ + vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; + case 3: /* USER - TODO */ + default: + return VM_FAULT_SIGBUS; + } +} + +static const struct vm_operations_struct xive_native_tima_vmops = { + .fault = xive_native_tima_fault, +}; + +static int kvmppc_xive_native_mmap(struct kvm_device *dev, + struct vm_area_struct *vma) +{ + /* We only allow mappings at fixed offset for now */ + if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { + if (vma_pages(vma) > 4) + return -EINVAL; + vma->vm_ops = &xive_native_tima_vmops; + } else { + return -EINVAL; + } + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + return 0; +} + static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, u64 addr) { @@ -1050,6 +1088,7 @@ struct kvm_device_ops kvm_xive_native_ops = { .set_attr = kvmppc_xive_native_set_attr, .get_attr = kvmppc_xive_native_get_attr, .has_attr = kvmppc_xive_native_has_attr, + .mmap = kvmppc_xive_native_mmap, }; void kvmppc_xive_native_init_module(void) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0c037e933e55..7782201e5fe8 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -521,6 +521,9 @@ u32 xive_native_default_eq_shift(void) } EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); +unsigned long xive_tima_os; +EXPORT_SYMBOL_GPL(xive_tima_os); + bool __init xive_native_init(void) { struct device_node *np; @@ -573,6 +576,14 @@ bool __init xive_native_init(void) for_each_possible_cpu(cpu) kvmppc_set_xive_tima(cpu, r.start, tima); + /* Resource 2 is OS window */ + if (of_address_to_resource(np, 2, &r)) { + pr_err("Failed to get thread mgmnt area resource\n"); + return false; + } + + xive_tima_os = r.start; + /* Grab size of provisionning pages */ xive_parse_provisioning(np); -- cgit v1.2.3-59-g8ed1b From 6520ca64cde71b75dae54f3fcb33517a93d82486 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:38 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a mapping for the source ESB pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each source is associated with an Event State Buffer (ESB) with a even/odd pair of pages which provides commands to manage the source: to trigger, to EOI, to turn off the source for instance. The custom VM fault handler will deduce the guest IRQ number from the offset of the fault, and the ESB page of the associated XIVE interrupt will be inserted into the VMA using the internal structure caching information on the interrupts. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 7 ++++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 57 ++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 0cd7847ec38a..69ee62d3d4dc 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -36,6 +36,13 @@ the legacy interrupt mode, referred as XICS (POWER7/8). third (operating system) and the fourth (user level) are exposed the guest. + 2. Event State Buffer (ESB) + + Each source is associated with an Event State Buffer (ESB) with + either a pair of even/odd pair of pages which provides commands to + manage the source: to trigger, to EOI, to turn off the source for + instance. + * Groups: 1. KVM_DEV_XIVE_GRP_CTRL diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0998e8edc91a..b0f72dea8b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -721,5 +721,6 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 #define KVM_XIVE_TIMA_PAGE_OFFSET 0 +#define KVM_XIVE_ESB_PAGE_OFFSET 4 #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index f5314da0cb45..465eb90ff23e 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -165,6 +165,59 @@ bail: return rc; } +static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct kvm_device *dev = vma->vm_file->private_data; + struct kvmppc_xive *xive = dev->private; + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + struct xive_irq_data *xd; + u32 hw_num; + u16 src; + u64 page; + unsigned long irq; + u64 page_offset; + + /* + * Linux/KVM uses a two pages ESB setting, one for trigger and + * one for EOI + */ + page_offset = vmf->pgoff - vma->vm_pgoff; + irq = page_offset / 2; + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) { + pr_devel("%s: source %lx not found !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + + state = &sb->irq_state[src]; + kvmppc_xive_select_irq(state, &hw_num, &xd); + + arch_spin_lock(&sb->lock); + + /* + * first/even page is for trigger + * second/odd page is for EOI and management. + */ + page = page_offset % 2 ? xd->eoi_page : xd->trig_page; + arch_spin_unlock(&sb->lock); + + if (WARN_ON(!page)) { + pr_err("%s: acessing invalid ESB page for source %lx !\n", + __func__, irq); + return VM_FAULT_SIGBUS; + } + + vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct xive_native_esb_vmops = { + .fault = xive_native_esb_fault, +}; + static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -194,6 +247,10 @@ static int kvmppc_xive_native_mmap(struct kvm_device *dev, if (vma_pages(vma) > 4) return -EINVAL; vma->vm_ops = &xive_native_tima_vmops; + } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { + if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) + return -EINVAL; + vma->vm_ops = &xive_native_esb_vmops; } else { return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 232b984b7d55e68971962f07f1dd1d1eb1be52e0 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:39 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add passthrough support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KVM XICS-over-XIVE device and the proposed KVM XIVE native device implement an IRQ space for the guest using the generic IPI interrupts of the XIVE IC controller. These interrupts are allocated at the OPAL level and "mapped" into the guest IRQ number space in the range 0-0x1FFF. Interrupt management is performed in the XIVE way: using loads and stores on the addresses of the XIVE IPI interrupt ESB pages. Both KVM devices share the same internal structure caching information on the interrupts, among which the xive_irq_data struct containing the addresses of the IPI ESB pages and an extra one in case of pass-through. The later contains the addresses of the ESB pages of the underlying HW controller interrupts, PHB4 in all cases for now. A guest, when running in the XICS legacy interrupt mode, lets the KVM XICS-over-XIVE device "handle" interrupt management, that is to perform the loads and stores on the addresses of the ESB pages of the guest interrupts. However, when running in XIVE native exploitation mode, the KVM XIVE native device exposes the interrupt ESB pages to the guest and lets the guest perform directly the loads and stores. The VMA exposing the ESB pages make use of a custom VM fault handler which role is to populate the VMA with appropriate pages. When a fault occurs, the guest IRQ number is deduced from the offset, and the ESB pages of associated XIVE IPI interrupt are inserted in the VMA (using the internal structure caching information on the interrupts). Supporting device passthrough in the guest running in XIVE native exploitation mode adds some extra refinements because the ESB pages of a different HW controller (PHB4) need to be exposed to the guest along with the initial IPI ESB pages of the XIVE IC controller. But the overall mechanic is the same. When the device HW irqs are mapped into or unmapped from the guest IRQ number space, the passthru_irq helpers, kvmppc_xive_set_mapped() and kvmppc_xive_clr_mapped(), are called to record or clear the passthrough interrupt information and to perform the switch. The approach taken by this patch is to clear the ESB pages of the guest IRQ number being mapped and let the VM fault handler repopulate. The handler will insert the ESB page corresponding to the HW interrupt of the device being passed-through or the initial IPI ESB page if the device is being removed. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/devices/xive.txt | 19 ++++++++++++++ arch/powerpc/kvm/book3s_xive.c | 15 +++++++++++ arch/powerpc/kvm/book3s_xive.h | 9 +++++++ arch/powerpc/kvm/book3s_xive_native.c | 41 ++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt index 69ee62d3d4dc..9a24a4525253 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virtual/kvm/devices/xive.txt @@ -43,6 +43,25 @@ the legacy interrupt mode, referred as XICS (POWER7/8). manage the source: to trigger, to EOI, to turn off the source for instance. + 3. Device pass-through + + When a device is passed-through into the guest, the source + interrupts are from a different HW controller (PHB4) and the ESB + pages exposed to the guest should accommadate this change. + + The passthru_irq helpers, kvmppc_xive_set_mapped() and + kvmppc_xive_clr_mapped() are called when the device HW irqs are + mapped into or unmapped from the guest IRQ number space. The KVM + device extends these helpers to clear the ESB pages of the guest IRQ + number being mapped and then lets the VM fault handler repopulate. + The handler will insert the ESB page corresponding to the HW + interrupt of the device being passed-through or the initial IPI ESB + page if the device has being removed. + + The ESB remapping is fully transparent to the guest and the OS + device driver. All handling is done within VFIO and the above + helpers in KVM-PPC. + * Groups: 1. KVM_DEV_XIVE_GRP_CTRL diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index c1b7aa7dbc28..480a3fc6b9fd 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -937,6 +937,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Turn the IPI hard off */ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) + xive->ops->reset_mapped(kvm, guest_irq); + /* Grab info about irq */ state->pt_number = hw_irq; state->pt_data = irq_data_get_irq_handler_data(host_data); @@ -1022,6 +1029,14 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, state->pt_number = 0; state->pt_data = NULL; + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) { + xive->ops->reset_mapped(kvm, guest_irq); + } + /* Reconfigure the IPI */ xive_native_configure_irq(state->ipi_number, kvmppc_xive_vp(xive, state->act_server), diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 622f594d93e1..e011622dc038 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -94,6 +94,11 @@ struct kvmppc_xive_src_block { struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; }; +struct kvmppc_xive; + +struct kvmppc_xive_ops { + int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); +}; struct kvmppc_xive { struct kvm *kvm; @@ -132,6 +137,10 @@ struct kvmppc_xive { /* Flags */ u8 single_escalation; + + struct kvmppc_xive_ops *ops; + struct address_space *mapping; + struct mutex mapping_lock; }; #define KVMPPC_XIVE_Q_COUNT 8 diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 465eb90ff23e..62648f833adf 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -165,6 +166,35 @@ bail: return rc; } +/* + * Device passthrough support + */ +static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) +{ + struct kvmppc_xive *xive = kvm->arch.xive; + + if (irq >= KVMPPC_XIVE_NR_IRQS) + return -EINVAL; + + /* + * Clear the ESB pages of the IRQ number being mapped (or + * unmapped) into the guest and let the the VM fault handler + * repopulate with the appropriate ESB pages (device or IC) + */ + pr_debug("clearing esb pages for girq 0x%lx\n", irq); + mutex_lock(&xive->mapping_lock); + if (xive->mapping) + unmap_mapping_range(xive->mapping, + irq * (2ull << PAGE_SHIFT), + 2ull << PAGE_SHIFT, 1); + mutex_unlock(&xive->mapping_lock); + return 0; +} + +static struct kvmppc_xive_ops kvmppc_xive_native_ops = { + .reset_mapped = kvmppc_xive_native_reset_mapped, +}; + static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -242,6 +272,8 @@ static const struct vm_operations_struct xive_native_tima_vmops = { static int kvmppc_xive_native_mmap(struct kvm_device *dev, struct vm_area_struct *vma) { + struct kvmppc_xive *xive = dev->private; + /* We only allow mappings at fixed offset for now */ if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { if (vma_pages(vma) > 4) @@ -257,6 +289,13 @@ static int kvmppc_xive_native_mmap(struct kvm_device *dev, vma->vm_flags |= VM_IO | VM_PFNMAP; vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + + /* + * Grab the KVM device file address_space to be able to clear + * the ESB pages mapping when a device is passed-through into + * the guest. + */ + xive->mapping = vma->vm_file->f_mapping; return 0; } @@ -971,6 +1010,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) xive->dev = dev; xive->kvm = kvm; kvm->arch.xive = xive; + mutex_init(&xive->mapping_lock); /* * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for @@ -984,6 +1024,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) ret = -ENXIO; xive->single_escalation = xive_native_has_single_escalation(); + xive->ops = &kvmppc_xive_native_ops; if (ret) kfree(xive); -- cgit v1.2.3-59-g8ed1b From 3a1e5e4a2c7a1ec63e6d70a7e921f62bcbb57b85 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Mon, 29 Apr 2019 15:25:35 +0200 Subject: Revert "KVM: doc: Document the life cycle of a VM and its resources" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 919f6cd8bb2fe7151f8aecebc3b3d1ca2567396e. The patch was applied twice. The first commit is eca6be566d47029f945a5f8e1c94d374e31df2ca. Reported-by: Cornelia Huck Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b62ad0d94234..26dc1280b49b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -69,23 +69,6 @@ by and on behalf of the VM's process may not be freed/unaccounted when the VM is shut down. -It is important to note that althought VM ioctls may only be issued from -the process that created the VM, a VM's lifecycle is associated with its -file descriptor, not its creator (process). In other words, the VM and -its resources, *including the associated address space*, are not freed -until the last reference to the VM's file descriptor has been released. -For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will -not be freed until both the parent (original) process and its child have -put their references to the VM's file descriptor. - -Because a VM's resources are not freed until the last reference to its -file descriptor is released, creating additional references to a VM via -via fork(), dup(), etc... without careful consideration is strongly -discouraged and may have unwanted side effects, e.g. memory allocated -by and on behalf of the VM's process may not be freed/unaccounted when -the VM is shut down. - - 3. Extensions ------------- -- cgit v1.2.3-59-g8ed1b From 65c4189de8c1d995f6bc2cc96b22206405466b53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Apr 2019 15:28:44 +0200 Subject: KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size If a memory slot's size is not a multiple of 64 pages (256K), then the KVM_CLEAR_DIRTY_LOG API is unusable: clearing the final 64 pages either requires the requested page range to go beyond memslot->npages, or requires log->num_pages to be unaligned, and kvm_clear_dirty_log_protect requires log->num_pages to be both in range and aligned. To allow this case, allow log->num_pages not to be a multiple of 64 if it ends exactly on the last page of the slot. Reported-by: Peter Xu Fixes: 98938aa8edd6 ("KVM: validate userspace input in kvm_clear_dirty_log_protect()", 2019-01-02) Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 +++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- virt/kvm/kvm_main.c | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 26dc1280b49b..675cb0bea903 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3812,8 +3812,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap field. Bit 0 of the bitmap corresponds to page "first_page" in the memory slot, and num_pages is the size in bits of the input bitmap. -Both first_page and num_pages must be a multiple of 64. For each bit -that is set in the input bitmap, the corresponding page is marked "clean" +first_page must be a multiple of 64; num_pages must also be a multiple of +64 unless first_page + num_pages is the size of the memory slot. For each +bit that is set in the input bitmap, the corresponding page is marked "clean" in KVM's dirty bitmap, and dirty tracking is re-enabled for that page (for example via write-protection, or by clearing the dirty bit in a page table entry). diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 4715cfba20dc..052fb5856df4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -289,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; guest_page_size = (1ul << guest_page_shift); /* 1G of guest page sized pages */ - guest_num_pages = (1ul << (30 - guest_page_shift)); + guest_num_pages = (1ul << (30 - guest_page_shift)) + 3; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -359,7 +359,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); #ifdef USE_CLEAR_DIRTY_LOG kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - DIV_ROUND_UP(host_num_pages, 64) * 64); + host_num_pages); #endif vm_dirty_log_verify(bmap); iteration++; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 71ac0de892dc..e9ca417b9ae9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; - if ((log->first_page & 63) || (log->num_pages & 63)) + if (log->first_page & 63) return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, n = kvm_dirty_bitmap_bytes(memslot); if (log->first_page > memslot->npages || - log->num_pages > memslot->npages - log->first_page) - return -EINVAL; + log->num_pages > memslot->npages - log->first_page || + (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) + return -EINVAL; *flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); -- cgit v1.2.3-59-g8ed1b From d7547c55cbe7471255ca51f14bcd4699f5eaabe5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:47 +0800 Subject: KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 The previous KVM_CAP_MANUAL_DIRTY_LOG_PROTECT has some problem which blocks the correct usage from userspace. Obsolete the old one and introduce a new capability bit for it. Suggested-by: Paolo Bonzini Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 15 ++++++++++----- include/uapi/linux/kvm.h | 5 +++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 4 files changed, 17 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 675cb0bea903..47a5eb00bc53 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -330,7 +330,7 @@ They must be less than the value that KVM_CHECK_EXTENSION returns for the KVM_CAP_MULTI_ADDRESS_SPACE capability. The bits in the dirty bitmap are cleared before the ioctl returns, unless -KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information, +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information, see the description of the capability. 4.9 KVM_SET_MEMORY_ALIAS @@ -3791,7 +3791,7 @@ to I/O ports. 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl) -Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 Architectures: x86 Type: vm ioctl Parameters: struct kvm_dirty_log (in) @@ -3824,10 +3824,10 @@ the address space for which you want to return the dirty bitmap. They must be less than the value that KVM_CHECK_EXTENSION returns for the KVM_CAP_MULTI_ADDRESS_SPACE capability. -This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled; for more information, see the description of the capability. However, it can always be used as long as KVM_CHECK_EXTENSION confirms -that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present. +that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present. 4.118 KVM_GET_SUPPORTED_HV_CPUID @@ -4780,7 +4780,7 @@ and injected exceptions. * For the new DR6 bits, note that bit 16 is set iff the #DB exception will clear DR6.RTM. -7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT +7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 Architectures: all Parameters: args[0] whether feature should be enabled or not @@ -4803,6 +4803,11 @@ while userspace can see false reports of dirty pages. Manual reprotection helps reducing this time, improving guest performance and reducing the number of dirty log false positives. +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make +it hard or impossible to use it correctly. The availability of +KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed. +Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT. 8. Other capabilities. ---------------------- diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d4ea4b6c922..d673734c46cb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -986,8 +986,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 -#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 #ifdef KVM_CAP_IRQ_ROUTING @@ -1434,7 +1435,7 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) -/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) /* Available with KVM_CAP_HYPERV_CPUID */ diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 052fb5856df4..a29d1119ccb3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -311,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT; + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; cap.args[0] = 1; vm_enable_cap(vm, &cap); #endif @@ -427,7 +427,7 @@ int main(int argc, char *argv[]) int opt, i; #ifdef USE_CLEAR_DIRTY_LOG - if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) { + if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) { fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); exit(KSFT_SKIP); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7883e0ad07fe..f4e02cd8fa43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3110,7 +3110,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: #endif return 1; #ifdef CONFIG_KVM_MMIO @@ -3148,7 +3148,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: if (cap->flags || (cap->args[0] & ~1)) return -EINVAL; kvm->manual_dirty_log_protect = cap->args[0]; -- cgit v1.2.3-59-g8ed1b