From b41fb528dd8784a87414e3af5579674badefa76b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 4 May 2019 06:51:45 +0000 Subject: KVM: s390: fix typo in parameter description Fix typo in parameter description. Fixes: 8b905d28ee17 ("KVM: s390: provide kvm_arch_no_poll function") Signed-off-by: Wei Yongjun Message-Id: <20190504065145.53665-1-weiyongjun1@huawei.com> Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8d6d75db8de6..ac6163c334d6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -181,7 +181,7 @@ MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* maximum percentage of steal time for polling. >100 is treated like 100 */ static u8 halt_poll_max_steal = 10; module_param(halt_poll_max_steal, byte, 0644); -MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling"); +MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); /* * For now we handle at most 16 double words as this is what the s390 base -- cgit v1.2.3-59-g8ed1b From 6e9b622d1c3628fae65217465a148b157a361c2a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 15 May 2019 09:59:08 +0200 Subject: KVM: s390: change default halt poll time to 50us Recent measurements indicate that using 50us results in a reduced CPU consumption, while still providing the benefit of halt polling. Let's use 50us instead. Acked-by: David Hildenbrand Acked-by: Janosch Frank Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bdbc81b5bc91..2b00a3ebee08 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -36,7 +36,7 @@ */ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 4096 -#define KVM_HALT_POLL_NS_DEFAULT 80000 +#define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ #define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) -- cgit v1.2.3-59-g8ed1b From b7c50fab66ab66e2d3e00f809a09578d78232836 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 May 2019 18:47:04 +0100 Subject: KVM: arm64: Move pmu hyp code under hyp's Makefile to avoid instrumentation KVM's pmu.c contains the __hyp_text needed to switch the pmu registers between host and guest. Because this isn't covered by the 'hyp' Makefile, it can be built with kasan and friends when these are enabled in Kconfig. When starting a guest, this results in: | Kernel panic - not syncing: HYP panic: | PS:a00003c9 PC:000083000028ada0 ESR:86000007 | FAR:000083000028ada0 HPFAR:0000000029df5300 PAR:0000000000000000 | VCPU:000000004e10b7d6 | CPU: 0 PID: 3088 Comm: qemu-system-aar Not tainted 5.2.0-rc1 #11026 | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Plat | Call trace: | dump_backtrace+0x0/0x200 | show_stack+0x20/0x30 | dump_stack+0xec/0x158 | panic+0x1ec/0x420 | panic+0x0/0x420 | SMP: stopping secondary CPUs | Kernel Offset: disabled | CPU features: 0x002,25006082 | Memory Limit: none | ---[ end Kernel panic - not syncing: HYP panic: This is caused by functions in pmu.c calling the instrumented code, which isn't mapped to hyp. From objdump -r: | RELOCATION RECORDS FOR [.hyp.text]: | OFFSET TYPE VALUE | 0000000000000010 R_AARCH64_CALL26 __sanitizer_cov_trace_pc | 0000000000000018 R_AARCH64_CALL26 __asan_load4_noabort | 0000000000000024 R_AARCH64_CALL26 __asan_load4_noabort Move the affected code to a new file under 'hyp's Makefile. Fixes: 3d91befbb3a0 ("arm64: KVM: Enable !VHE support for :G/:H perf event modifiers") Cc: Andrew Murray Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/hyp/switch.c | 39 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/pmu.c | 38 -------------------------------------- 3 files changed, 39 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2a8d3f8ca22c..4bcd9c1291d5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -592,9 +592,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); -void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt); -bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt); - void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 22b4c335e0b2..8799e0c267d4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -505,6 +506,44 @@ static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) #endif } +/** + * Disable host events, enable guest events + */ +static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + /* Switch to the guest for VHE systems running in EL2 */ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 3da94a5bb6b7..e71d00bb5271 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -53,44 +53,6 @@ void kvm_clr_pmu_events(u32 clr) ctx->pmu_events.events_guest &= ~clr; } -/** - * Disable host events, enable guest events - */ -bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - #define PMEVTYPER_READ_CASE(idx) \ case idx: \ return read_sysreg(pmevtyper##idx##_el0) -- cgit v1.2.3-59-g8ed1b From 623e1528d4090bd1abaf93ec46f047dee9a6fb32 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 May 2019 18:47:05 +0100 Subject: KVM: arm/arm64: Move cc/it checks under hyp's Makefile to avoid instrumentation KVM has helpers to handle the condition codes of trapped aarch32 instructions. These are marked __hyp_text and used from HYP, but they aren't built by the 'hyp' Makefile, which has all the runes to avoid ASAN and KCOV instrumentation. Move this code to a new hyp/aarch32.c to avoid a hyp-panic when starting an aarch32 guest on a host built with the ASAN/KCOV debug options. Fixes: 021234ef3752f ("KVM: arm64: Make kvm_condition_valid32() accessible from EL2") Fixes: 8cebe750c4d9a ("arm64: KVM: Make kvm_skip_instr32 available to HYP") Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm/kvm/hyp/Makefile | 1 + arch/arm64/kvm/hyp/Makefile | 1 + virt/kvm/arm/aarch32.c | 121 --------------------------------------- virt/kvm/arm/hyp/aarch32.c | 136 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 121 deletions(-) create mode 100644 virt/kvm/arm/hyp/aarch32.c (limited to 'arch') diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index d2b5ec9c4b92..ba88b1eca93c 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -11,6 +11,7 @@ CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 82d1904328ad..ea710f674cb6 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,6 +10,7 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 5abbe9b3c652..6880236974b8 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -25,127 +25,6 @@ #include #include -/* - * stolen from arch/arm/kernel/opcodes.c - * - * condition code lookup table - * index into the table is test code: EQ, NE, ... LT, GT, AL, NV - * - * bit position in short is condition code: NZCV - */ -static const unsigned short cc_map[16] = { - 0xF0F0, /* EQ == Z set */ - 0x0F0F, /* NE */ - 0xCCCC, /* CS == C set */ - 0x3333, /* CC */ - 0xFF00, /* MI == N set */ - 0x00FF, /* PL */ - 0xAAAA, /* VS == V set */ - 0x5555, /* VC */ - 0x0C0C, /* HI == C set && Z clear */ - 0xF3F3, /* LS == C clear || Z set */ - 0xAA55, /* GE == (N==V) */ - 0x55AA, /* LT == (N!=V) */ - 0x0A05, /* GT == (!Z && (N==V)) */ - 0xF5FA, /* LE == (Z || (N!=V)) */ - 0xFFFF, /* AL always */ - 0 /* NV */ -}; - -/* - * Check if a trapped instruction should have been executed or not. - */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) -{ - unsigned long cpsr; - u32 cpsr_cond; - int cond; - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) - return true; - - /* Is condition field valid? */ - cond = kvm_vcpu_get_condition(vcpu); - if (cond == 0xE) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - if (cond < 0) { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - cpsr_cond = cpsr >> 28; - - if (!((cc_map[cond] >> cpsr_cond) & 1)) - return false; - - return true; -} - -/** - * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block - * @vcpu: The VCPU pointer - * - * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have - * to do this little bit of work manually. The fields map like this: - * - * IT[7:0] -> CPSR[26:25],CPSR[15:10] - */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) -{ - unsigned long itbits, cond; - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & PSR_AA32_T_BIT); - - if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) - return; - - cond = (cpsr & 0xe000) >> 13; - itbits = (cpsr & 0x1c00) >> (10 - 2); - itbits |= (cpsr & (0x3 << 25)) >> 25; - - /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ - if ((itbits & 0x7) == 0) - itbits = cond = 0; - else - itbits = (itbits << 1) & 0x1f; - - cpsr &= ~PSR_AA32_IT_MASK; - cpsr |= cond << 13; - cpsr |= (itbits & 0x1c) << (10 - 2); - cpsr |= (itbits & 0x3) << 25; - *vcpu_cpsr(vcpu) = cpsr; -} - -/** - * kvm_skip_instr - skip a trapped instruction and proceed to the next - * @vcpu: The vcpu pointer - */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - bool is_thumb; - - is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; - else - *vcpu_pc(vcpu) += 4; - kvm_adjust_itstate(vcpu); -} - /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ diff --git a/virt/kvm/arm/hyp/aarch32.c b/virt/kvm/arm/hyp/aarch32.c new file mode 100644 index 000000000000..d31f267961e7 --- /dev/null +++ b/virt/kvm/arm/hyp/aarch32.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + */ + +#include +#include +#include + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} -- cgit v1.2.3-59-g8ed1b From db80927ea1977a845230a161df643b48fd1e1ea4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 11:55:36 +0200 Subject: KVM: nVMX: really fix the size checks on KVM_SET_NESTED_STATE The offset for reading the shadow VMCS is sizeof(*kvm_state)+VMCS12_SIZE, so the correct size must be that plus sizeof(*vmcs12). This could lead to KVM reading garbage data from userspace and not reporting an error, but is otherwise not sensitive. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f1a69117ac0f..2fd251ded754 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5427,7 +5427,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) return -EINVAL; if (copy_from_user(shadow_vmcs12, -- cgit v1.2.3-59-g8ed1b From 21be4ca1ea685ab3ccc2fd18babb89bc477a9e5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 May 2019 11:04:32 -0700 Subject: KVM: nVMX: Clear nested_run_pending if setting nested state fails VMX's nested_run_pending flag is subtly consumed when stuffing state to enter guest mode, i.e. needs to be set according before KVM knows if setting guest state is successful. If setting guest state fails, clear the flag as a nested run is obviously not pending. Reported-by: Aaron Lewis Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2fd251ded754..6b450839c766 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5423,39 +5423,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return 0; + vmx->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + + ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) - return -EINVAL; + goto error_guest_mode; if (copy_from_user(shadow_vmcs12, user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) - return -EFAULT; + sizeof(*vmcs12))) { + ret = -EFAULT; + goto error_guest_mode; + } if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || !shadow_vmcs12->hdr.shadow_vmcs) - return -EINVAL; + goto error_guest_mode; } if (nested_vmx_check_controls(vcpu, vmcs12) || nested_vmx_check_host_state(vcpu, vmcs12) || nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) - return -EINVAL; + goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; - vmx->nested.nested_run_pending = - !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) { - vmx->nested.nested_run_pending = 0; - return -EINVAL; - } + if (ret) + goto error_guest_mode; return 0; + +error_guest_mode: + vmx->nested.nested_run_pending = 0; + return ret; } void nested_vmx_vcpu_setup(void) -- cgit v1.2.3-59-g8ed1b From 32a243df82c8dc04ccba7fc6c6564ae9261cb738 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 27 Mar 2019 13:15:36 -0700 Subject: kvm: x86: Include multiple indices with CPUID leaf 0x8000001d Per the APM, "CPUID Fn8000_001D_E[D,C,B,A]X reports cache topology information for the cache enumerated by the value passed to the instruction in ECX, referred to as Cache n in the following description. To gather information for all cache levels, software must repeatedly execute CPUID with 8000_001Dh in EAX and ECX set to increasing values beginning with 0 until a value of 00h is returned in the field CacheType (EAX[4:0]) indicating no more cache descriptions are available for this processor." The termination condition is the same as leaf 4, so we can reuse that code block for leaf 0x8000001d. Fixes: 8765d75329a38 ("KVM: X86: Extend CPUID range to include new leaf") Cc: Brijesh Singh Cc: Borislav Petkov Signed-off-by: Jim Mattson Reviewed-by: Marc Orr Reviewed-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 80a642a0143d..3c96ce8fbb96 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -456,8 +456,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } - /* function 4 has additional index. */ - case 4: { + /* functions 4 and 0x8000001d have additional index. */ + case 4: + case 0x8000001d: { int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -702,8 +703,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 0x8000001a: break; - case 0x8000001d: - break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ -- cgit v1.2.3-59-g8ed1b From 382409b4c43e5b44ae4a869ff793d3cf01d12004 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 27 Mar 2019 13:15:37 -0700 Subject: kvm: x86: Include CPUID leaf 0x8000001e in kvm's supported CPUID Kvm now supports extended CPUID functions through 0x8000001f. CPUID leaf 0x8000001e is AMD's Processor Topology Information leaf. This contains similar information to CPUID leaf 0xb (Intel's Extended Topology Enumeration leaf), and should be included in the output of KVM_GET_SUPPORTED_CPUID, even though userspace is likely to override some of this information based upon the configuration of the particular VM. Cc: Brijesh Singh Cc: Borislav Petkov Fixes: 8765d75329a38 ("KVM: X86: Extend CPUID range to include new leaf") Signed-off-by: Jim Mattson Reviewed-by: Marc Orr Reviewed-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3c96ce8fbb96..e18a9f9f65b5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -702,6 +702,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx = entry->edx = 0; break; case 0x8000001a: + case 0x8000001e: break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: -- cgit v1.2.3-59-g8ed1b From 541e886f7972cc647804dbb4909189e67987a945 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 17 May 2019 16:49:50 +0800 Subject: KVM: nVMX: Fix using __this_cpu_read() in preemptible context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/4590 caller is nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel] CPU: 4 PID: 4590 Comm: qemu-system-x86 Tainted: G OE 5.1.0-rc4+ #1 Call Trace: dump_stack+0x67/0x95 __this_cpu_preempt_check+0xd2/0xe0 nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel] nested_vmx_run+0xda/0x2b0 [kvm_intel] handle_vmlaunch+0x13/0x20 [kvm_intel] vmx_handle_exit+0xbd/0x660 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xa2c/0x1e50 [kvm] kvm_vcpu_ioctl+0x3ad/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x6e0 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x6f/0x6c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Accessing per-cpu variable should disable preemption, this patch extends the preemption disable region for __this_cpu_read(). Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Fixes: 52017608da33 ("KVM: nVMX: add option to perform early consistency checks via H/W") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6b450839c766..1032f068f0b9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2784,14 +2784,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) : "cc", "memory" ); - preempt_enable(); - if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + preempt_enable(); WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -2803,6 +2802,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + preempt_enable(); /* * A non-failing VMEntry means we somehow entered guest mode with -- cgit v1.2.3-59-g8ed1b From 4d259965655c92053f3255aca14d81aab1e21219 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 20 May 2019 12:27:47 +0800 Subject: kvm: vmx: Fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get a warning when build kernel W=1: arch/x86/kvm/vmx/vmx.c:6365:6: warning: no previous prototype for ‘vmx_update_host_rsp’ [-Wmissing-prototypes] void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) Add the missing declaration to fix this. Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 63d37ccce3dc..61128b48c503 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -319,6 +319,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 -- cgit v1.2.3-59-g8ed1b From 0e6edceb8f18a4e31526d83e6099fef1f29c3af5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 May 2019 16:18:06 +0800 Subject: KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of timer advancement), '-1' enables adaptive tuning starting from default advancment of 1000ns. However, we should expose an int instead of an overflow uint module parameter. Before patch: /sys/module/kvm/parameters/lapic_timer_advance_ns:4294967295 After patch: /sys/module/kvm/parameters/lapic_timer_advance_ns:-1 Fixes: c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of timer advancement) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536b78c4af6e..e7e57de50a3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); * tuning, i.e. allows priveleged userspace to set an exact advancement time. */ static int __read_mostly lapic_timer_advance_ns = -1; -module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); -- cgit v1.2.3-59-g8ed1b From 16ba3ab4e15ca031270deb3cbfd38416c719e291 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 May 2019 16:18:07 +0800 Subject: KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose per-vCPU timer_advance_ns to userspace, so it is able to query the auto-adjusted value. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index c19c7ede9bd6..a2f3432ce090 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -9,12 +9,22 @@ */ #include #include +#include "lapic.h" bool kvm_arch_has_vcpu_debugfs(void) { return true; } +static int vcpu_get_timer_advance_ns(void *data, u64 *val) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; + *val = vcpu->arch.apic->lapic_timer.timer_advance_ns; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n"); + static int vcpu_get_tsc_offset(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; @@ -51,6 +61,14 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) if (!ret) return -ENOMEM; + if (lapic_in_kernel(vcpu)) { + ret = debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, + vcpu, &vcpu_timer_advance_ns_fops); + if (!ret) + return -ENOMEM; + } + if (kvm_has_tsc_control) { ret = debugfs_create_file("tsc-scaling-ratio", 0444, vcpu->debugfs_dentry, -- cgit v1.2.3-59-g8ed1b From c9bcd3e3335d0a29d89fabd2c385e1b989e6f1b0 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 14 May 2019 15:49:52 +0000 Subject: kvm: svm/avic: fix off-by-one in checking host APIC ID Current logic does not allow VCPU to be loaded onto CPU with APIC ID 255. This should be allowed since the host physical APIC ID field in the AVIC Physical APIC table entry is an 8-bit value, and APIC ID 255 is valid in system with x2APIC enabled. Instead, do not allow VCPU load if the host APIC ID cannot be represented by an 8-bit value. Also, use the more appropriate AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK instead of AVIC_MAX_PHYSICAL_ID_COUNT. Signed-off-by: Suravee Suthikulpanit Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a849dcb7fbc5..a9e553a1317f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2024,7 +2024,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) + /* + * Since the host physical APIC id is 8 bits, + * we can support host APIC ID upto 255. + */ + if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); -- cgit v1.2.3-59-g8ed1b From 654f1f13ea56b92bacade8ce2725aea0457f91c0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 5 May 2019 16:56:42 +0800 Subject: kvm: Check irqchip mode before assign irqfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When assigning kvm irqfd we didn't check the irqchip mode but we allow KVM_IRQFD to succeed with all the irqchip modes. However it does not make much sense to create irqfd even without the kernel chips. Let's provide a arch-dependent helper to check whether a specific irqfd is allowed by the arch. At least for x86, it should make sense to check: - when irqchip mode is NONE, all irqfds should be disallowed, and, - when irqchip mode is SPLIT, irqfds that are with resamplefd should be disallowed. For either of the case, previously we'll silently ignore the irq or the irq ack event if the irqchip mode is incorrect. However that can cause misterious guest behaviors and it can be hard to triage. Let's fail KVM_IRQFD even earlier to detect these incorrect configurations. CC: Paolo Bonzini CC: Radim Krčmář CC: Alex Williamson CC: Eduardo Habkost Signed-off-by: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq.c | 7 +++++++ arch/x86/kvm/irq.h | 1 + virt/kvm/eventfd.c | 9 +++++++++ 3 files changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index faa264822cee..007bc654f928 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); } + +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE; + + return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); +} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d5005cc26521..fd210cdd4983 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return mode != KVM_IRQCHIP_NONE; } +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 001aeda4c154..3972a9564c76 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -44,6 +44,12 @@ static struct workqueue_struct *irqfd_cleanup_wq; +bool __attribute__((weak)) +kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + return true; +} + static void irqfd_inject(struct work_struct *work) { @@ -297,6 +303,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (!kvm_arch_intc_initialized(kvm)) return -EAGAIN; + if (!kvm_arch_irqfd_allowed(kvm, args)) + return -EINVAL; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); if (!irqfd) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 6f2f84532c153d32a5e53b6083b06a3e24368d30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 15:34:35 +0200 Subject: KVM: x86: do not spam dmesg with VMCS/VMCB dumps Userspace can easily set up invalid processor state in such a way that dmesg will be filled with VMCS or VMCB dumps. Disable this by default using a module parameter. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx/vmx.c | 26 +++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9e553a1317f..735b8c01895e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -379,6 +379,9 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static bool __read_mostly dump_invalid_vmcb = 0; +module_param(dump_invalid_vmcb, bool, 0644); + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -4828,6 +4831,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + pr_err("VMCB Control Area:\n"); pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); @@ -4986,7 +4994,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; - pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); dump_vmcb(vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1ac167614032..b93e36ddee5e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -114,6 +114,9 @@ static u64 __read_mostly host_xss; bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +static bool __read_mostly dump_invalid_vmcs = 0; +module_param(dump_invalid_vmcs, bool, 0644); + #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 @@ -5607,15 +5610,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) void dump_vmcs(void) { - u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - u32 secondary_exec_control = 0; - unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_read64(GUEST_IA32_EFER); + u32 vmentry_ctl, vmexit_ctl; + u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + unsigned long cr4; + u64 efer; int i, n; + if (!dump_invalid_vmcs) { + pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); + return; + } + + vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + cr4 = vmcs_readl(GUEST_CR4); + efer = vmcs_read64(GUEST_IA32_EFER); + secondary_exec_control = 0; if (cpu_has_secondary_exec_ctrls()) secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); -- cgit v1.2.3-59-g8ed1b From a80c4ec10ed9632c44c829452dc40a0443ff4e85 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 8 May 2019 19:02:48 +0200 Subject: x86/kvm/pmu: Set AMD's virt PMU version to 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit: 672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU") my AMD guests started #GPing like this: general protection fault: 0000 [#1] PREEMPT SMP CPU: 1 PID: 4355 Comm: bash Not tainted 5.1.0-rc6+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:x86_perf_event_update+0x3b/0xa0 with Code: pointing to RDPMC. It is RDPMC because the guest has the hardware watchdog CONFIG_HARDLOCKUP_DETECTOR_PERF enabled which uses perf. Instrumenting kvm_pmu_rdpmc() some, showed that it fails due to: if (!pmu->version) return 1; which the above commit added. Since AMD's PMU leaves the version at 0, that causes the #GP injection into the guest. Set pmu->version arbitrarily to 1 and move it above the non-applicable struct kvm_pmu members. Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: Liran Alon Cc: Mihai Carabas Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: "Radim Krčmář" Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86@kernel.org Cc: stable@vger.kernel.org Fixes: 672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 1495a735b38e..50fa9450fcf1 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - pmu->version = 0; pmu->global_status = 0; } -- cgit v1.2.3-59-g8ed1b From 0e6f467ee28ec97f68c7b74e35ec1601bb1368a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 17:20:40 +0200 Subject: KVM: x86/pmu: mask the result of rdpmc according to the width of the counters This patch will simplify the changes in the next, by enforcing the masking of the counters to RDPMC and RDMSR. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 10 +++------- arch/x86/kvm/pmu.h | 3 ++- arch/x86/kvm/pmu_amd.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 13 +++++++++---- 4 files changed, 15 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index e39741997893..dd745b58ffd8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -283,7 +283,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) bool fast_mode = idx & (1u << 31); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - u64 ctr_val; + u64 mask = fast_mode ? ~0u : ~0ull; if (!pmu->version) return 1; @@ -291,15 +291,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); - pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); if (!pmc) return 1; - ctr_val = pmc_read_counter(pmc); - if (fast_mode) - ctr_val = (u32)ctr_val; - - *data = ctr_val; + *data = pmc_read_counter(pmc) & mask; return 0; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ba8898e1a854..22dff661145a 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -25,7 +25,8 @@ struct kvm_pmu_ops { unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); - struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); + struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, + u64 *mask); int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 50fa9450fcf1..d3118088f1cd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -186,7 +186,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } /* idx is the ECX register of RDPMC instruction */ -static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx) +static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *counters; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f8502c376b37..b6f5157445fe 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -126,7 +126,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, - unsigned idx) + unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -138,6 +138,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, if (fixed && idx >= pmu->nr_arch_fixed_counters) return NULL; counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; return &counters[idx]; } @@ -183,9 +184,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) *data = pmu->global_ovf_ctrl; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - *data = pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { *data = pmc->eventsel; -- cgit v1.2.3-59-g8ed1b From 2924b52117b2812e9633d5ea337333299166d373 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 17:34:30 +0200 Subject: KVM: x86/pmu: do not mask the value that is written to fixed PMUs According to the SDM, for MSR_IA32_PERFCTR0/1 "the lower-order 32 bits of each MSR may be written with any value, and the high-order 8 bits are sign-extended according to the value of bit 31", but the fixed counters in real hardware are limited to the width of the fixed counters ("bits beyond the width of the fixed-function counter are reserved and must be written as zeros"). Fix KVM to do the same. Reported-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b6f5157445fe..a99613a060dd 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -240,11 +240,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - if (!msr_info->host_initiated) - data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if (msr_info->host_initiated) + pmc->counter = data; + else + pmc->counter = (s32)data; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + pmc->counter = data; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) -- cgit v1.2.3-59-g8ed1b From 19ec166c3f39fe1d3789888a74cc95544ac266d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 24 May 2019 16:06:23 +0200 Subject: KVM: s390: fix memory slot handling for KVM_SET_USER_MEMORY_REGION kselftests exposed a problem in the s390 handling for memory slots. Right now we only do proper memory slot handling for creation of new memory slots. Neither MOVE, nor DELETION are handled properly. Let us implement those. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ac6163c334d6..e5e8eb29e68e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4524,21 +4524,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int rc; - - /* If the basics of the memslot do not change, we do not want - * to update the gmap. Every update causes several unnecessary - * segment translation exceptions. This is usually handled just - * fine by the normal fault handler + gmap, but it will also - * cause faults on the prefix page of running guest CPUs. - */ - if (old->userspace_addr == mem->userspace_addr && - old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && - old->npages * PAGE_SIZE == mem->memory_size) - return; + int rc = 0; - rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, - mem->guest_phys_addr, mem->memory_size); + switch (change) { + case KVM_MR_DELETE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + break; + case KVM_MR_MOVE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + if (rc) + break; + /* FALLTHROUGH */ + case KVM_MR_CREATE: + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + break; + case KVM_MR_FLAGS_ONLY: + break; + default: + WARN(1, "Unknown KVM MR CHANGE: %d\n", change); + } if (rc) pr_warn("failed to commit memory region\n"); return; -- cgit v1.2.3-59-g8ed1b From 66f61c92889ff3ca365161fb29dd36d6354682ba Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2019 21:52:46 +0200 Subject: KVM: x86: fix return value for reserved EFER Commit 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes", 2019-04-02) introduced a "return false" in a function returning int, and anyway set_efer has a "nonzero on error" conventon so it should be returning 1. Reported-by: Pavel Machek Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e7e57de50a3c..acb179f78fdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1298,7 +1298,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 efer = msr_info->data; if (efer & efer_reserved_bits) - return false; + return 1; if (!msr_info->host_initiated) { if (!__kvm_valid_efer(vcpu, efer)) -- cgit v1.2.3-59-g8ed1b