aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-26 13:45:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-26 13:45:15 -0700
commit862f0a3227b337cea11d0488b0345dc2670fc297 (patch)
treea3376ede3d214679cbde295f7319f20537accfbc /arch
parentMerge tag 'random_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random (diff)
parentKVM: x86: fix return value for reserved EFER (diff)
downloadlinux-dev-862f0a3227b337cea11d0488b0345dc2670fc297.tar.xz
linux-dev-862f0a3227b337cea11d0488b0345dc2670fc297.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "The usual smattering of fixes and tunings that came in too late for the merge window, but should not wait four months before they appear in a release. I also travelled a bit more than usual in the first part of May, which didn't help with picking up patches and reports promptly" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (33 commits) KVM: x86: fix return value for reserved EFER tools/kvm_stat: fix fields filter for child events KVM: selftests: Wrap vcpu_nested_state_get/set functions with x86 guard kvm: selftests: aarch64: compile with warnings on kvm: selftests: aarch64: fix default vm mode kvm: selftests: aarch64: dirty_log_test: fix unaligned memslot size KVM: s390: fix memory slot handling for KVM_SET_USER_MEMORY_REGION KVM: x86/pmu: do not mask the value that is written to fixed PMUs KVM: x86/pmu: mask the result of rdpmc according to the width of the counters x86/kvm/pmu: Set AMD's virt PMU version to 1 KVM: x86: do not spam dmesg with VMCS/VMCB dumps kvm: Check irqchip mode before assign irqfd kvm: svm/avic: fix off-by-one in checking host APIC ID KVM: selftests: do not blindly clobber registers in guest asm KVM: selftests: Remove duplicated TEST_ASSERT in hyperv_cpuid.c KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow kvm: vmx: Fix -Wmissing-prototypes warnings KVM: nVMX: Fix using __this_cpu_read() in preemptible context kvm: fix compilation on s390 ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/hyp/Makefile1
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/kvm/hyp/Makefile1
-rw-r--r--arch/arm64/kvm/hyp/switch.c39
-rw-r--r--arch/arm64/kvm/pmu.c38
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/kvm/kvm-s390.c37
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/debugfs.c18
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/pmu.c10
-rw-r--r--arch/x86/kvm/pmu.h3
-rw-r--r--arch/x86/kvm/pmu_amd.c4
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx/nested.c35
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c26
-rw-r--r--arch/x86/kvm/vmx/vmx.c26
-rw-r--r--arch/x86/kvm/vmx/vmx.h1
-rw-r--r--arch/x86/kvm/x86.c4
20 files changed, 173 insertions, 106 deletions
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index d2b5ec9c4b92..ba88b1eca93c 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -11,6 +11,7 @@ CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2a8d3f8ca22c..4bcd9c1291d5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -592,9 +592,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
-void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt);
-bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt);
-
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
#else
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 82d1904328ad..ea710f674cb6 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,6 +10,7 @@ KVM=../../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 22b4c335e0b2..8799e0c267d4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -16,6 +16,7 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
#include <linux/types.h>
#include <linux/jump_label.h>
#include <uapi/linux/psci.h>
@@ -505,6 +506,44 @@ static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
#endif
}
+/**
+ * Disable host events, enable guest events
+ */
+static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+ return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
/* Switch to the guest for VHE systems running in EL2 */
int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 3da94a5bb6b7..e71d00bb5271 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -53,44 +53,6 @@ void kvm_clr_pmu_events(u32 clr)
ctx->pmu_events.events_guest &= ~clr;
}
-/**
- * Disable host events, enable guest events
- */
-bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenclr_el0);
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenset_el0);
-
- return (pmu->events_host || pmu->events_guest);
-}
-
-/**
- * Disable guest events, enable host events
- */
-void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenclr_el0);
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenset_el0);
-}
-
#define PMEVTYPER_READ_CASE(idx) \
case idx: \
return read_sysreg(pmevtyper##idx##_el0)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index bdbc81b5bc91..2b00a3ebee08 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -36,7 +36,7 @@
*/
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 4096
-#define KVM_HALT_POLL_NS_DEFAULT 80000
+#define KVM_HALT_POLL_NS_DEFAULT 50000
/* s390-specific vcpu->requests bit members */
#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8d6d75db8de6..e5e8eb29e68e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -181,7 +181,7 @@ MODULE_PARM_DESC(hpage, "1m huge page backing support");
/* maximum percentage of steal time for polling. >100 is treated like 100 */
static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
-MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling");
+MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
/*
* For now we handle at most 16 double words as this is what the s390 base
@@ -4524,21 +4524,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- int rc;
-
- /* If the basics of the memslot do not change, we do not want
- * to update the gmap. Every update causes several unnecessary
- * segment translation exceptions. This is usually handled just
- * fine by the normal fault handler + gmap, but it will also
- * cause faults on the prefix page of running guest CPUs.
- */
- if (old->userspace_addr == mem->userspace_addr &&
- old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
- old->npages * PAGE_SIZE == mem->memory_size)
- return;
+ int rc = 0;
- rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
- mem->guest_phys_addr, mem->memory_size);
+ switch (change) {
+ case KVM_MR_DELETE:
+ rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+ old->npages * PAGE_SIZE);
+ break;
+ case KVM_MR_MOVE:
+ rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+ old->npages * PAGE_SIZE);
+ if (rc)
+ break;
+ /* FALLTHROUGH */
+ case KVM_MR_CREATE:
+ rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
+ mem->guest_phys_addr, mem->memory_size);
+ break;
+ case KVM_MR_FLAGS_ONLY:
+ break;
+ default:
+ WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
+ }
if (rc)
pr_warn("failed to commit memory region\n");
return;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 80a642a0143d..e18a9f9f65b5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -456,8 +456,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
break;
}
- /* function 4 has additional index. */
- case 4: {
+ /* functions 4 and 0x8000001d have additional index. */
+ case 4:
+ case 0x8000001d: {
int i, cache_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -701,8 +702,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
- break;
- case 0x8000001d:
+ case 0x8000001e:
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index c19c7ede9bd6..a2f3432ce090 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -9,12 +9,22 @@
*/
#include <linux/kvm_host.h>
#include <linux/debugfs.h>
+#include "lapic.h"
bool kvm_arch_has_vcpu_debugfs(void)
{
return true;
}
+static int vcpu_get_timer_advance_ns(void *data, u64 *val)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+ *val = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
+
static int vcpu_get_tsc_offset(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -51,6 +61,14 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
if (!ret)
return -ENOMEM;
+ if (lapic_in_kernel(vcpu)) {
+ ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
+ vcpu->debugfs_dentry,
+ vcpu, &vcpu_timer_advance_ns_fops);
+ if (!ret)
+ return -ENOMEM;
+ }
+
if (kvm_has_tsc_control) {
ret = debugfs_create_file("tsc-scaling-ratio", 0444,
vcpu->debugfs_dentry,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index faa264822cee..007bc654f928 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
}
+
+bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
+{
+ bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE;
+
+ return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index d5005cc26521..fd210cdd4983 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return mode != KVM_IRQCHIP_NONE;
}
+bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index e39741997893..dd745b58ffd8 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -283,7 +283,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
bool fast_mode = idx & (1u << 31);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- u64 ctr_val;
+ u64 mask = fast_mode ? ~0u : ~0ull;
if (!pmu->version)
return 1;
@@ -291,15 +291,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
- pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
+ pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
if (!pmc)
return 1;
- ctr_val = pmc_read_counter(pmc);
- if (fast_mode)
- ctr_val = (u32)ctr_val;
-
- *data = ctr_val;
+ *data = pmc_read_counter(pmc) & mask;
return 0;
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e1a854..22dff661145a 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,7 +25,8 @@ struct kvm_pmu_ops {
unsigned (*find_fixed_event)(int idx);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
- struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx);
+ struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
+ u64 *mask);
int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 1495a735b38e..d3118088f1cd 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -186,7 +186,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
}
/* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx)
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *counters;
@@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xffffffff00200000ull;
+ pmu->version = 1;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
- pmu->version = 0;
pmu->global_status = 0;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a849dcb7fbc5..735b8c01895e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -379,6 +379,9 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
+static bool __read_mostly dump_invalid_vmcb = 0;
+module_param(dump_invalid_vmcb, bool, 0644);
+
static u8 rsm_ins_bytes[] = "\x0f\xaa";
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
@@ -2024,7 +2027,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (!kvm_vcpu_apicv_active(vcpu))
return;
- if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+ /*
+ * Since the host physical APIC id is 8 bits,
+ * we can support host APIC ID upto 255.
+ */
+ if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache));
@@ -4824,6 +4831,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
+ if (!dump_invalid_vmcb) {
+ pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+ return;
+ }
+
pr_err("VMCB Control Area:\n");
pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
@@ -4982,7 +4994,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
= svm->vmcb->control.exit_code;
- pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
dump_vmcb(vcpu);
return 0;
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f1a69117ac0f..1032f068f0b9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2784,14 +2784,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
: "cc", "memory"
);
- preempt_enable();
-
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
if (vmx->msr_autoload.guest.nr)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
if (vm_fail) {
+ preempt_enable();
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
@@ -2803,6 +2802,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
local_irq_enable();
if (hw_breakpoint_active())
set_debugreg(__this_cpu_read(cpu_dr7), 7);
+ preempt_enable();
/*
* A non-failing VMEntry means we somehow entered guest mode with
@@ -5423,39 +5423,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return 0;
+ vmx->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
+ ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
- if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
- return -EINVAL;
+ if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12))
+ goto error_guest_mode;
if (copy_from_user(shadow_vmcs12,
user_kvm_nested_state->data + VMCS12_SIZE,
- sizeof(*vmcs12)))
- return -EFAULT;
+ sizeof(*vmcs12))) {
+ ret = -EFAULT;
+ goto error_guest_mode;
+ }
if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
!shadow_vmcs12->hdr.shadow_vmcs)
- return -EINVAL;
+ goto error_guest_mode;
}
if (nested_vmx_check_controls(vcpu, vmcs12) ||
nested_vmx_check_host_state(vcpu, vmcs12) ||
nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
- return -EINVAL;
+ goto error_guest_mode;
vmx->nested.dirty_vmcs12 = true;
- vmx->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
-
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret) {
- vmx->nested.nested_run_pending = 0;
- return -EINVAL;
- }
+ if (ret)
+ goto error_guest_mode;
return 0;
+
+error_guest_mode:
+ vmx->nested.nested_run_pending = 0;
+ return ret;
}
void nested_vmx_vcpu_setup(void)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f8502c376b37..a99613a060dd 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -126,7 +126,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
}
static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
- unsigned idx)
+ unsigned idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
@@ -138,6 +138,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
if (fixed && idx >= pmu->nr_arch_fixed_counters)
return NULL;
counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+ *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
return &counters[idx];
}
@@ -183,9 +184,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
*data = pmu->global_ovf_ctrl;
return 0;
default:
- if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
- (pmc = get_fixed_pmc(pmu, msr))) {
- *data = pmc_read_counter(pmc);
+ if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
+ u64 val = pmc_read_counter(pmc);
+ *data = val & pmu->counter_bitmask[KVM_PMC_GP];
+ return 0;
+ } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ u64 val = pmc_read_counter(pmc);
+ *data = val & pmu->counter_bitmask[KVM_PMC_FIXED];
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
*data = pmc->eventsel;
@@ -235,11 +240,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
default:
- if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
- (pmc = get_fixed_pmc(pmu, msr))) {
- if (!msr_info->host_initiated)
- data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
+ if (msr_info->host_initiated)
+ pmc->counter = data;
+ else
+ pmc->counter = (s32)data;
+ return 0;
+ } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ pmc->counter = data;
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1ac167614032..b93e36ddee5e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -114,6 +114,9 @@ static u64 __read_mostly host_xss;
bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+static bool __read_mostly dump_invalid_vmcs = 0;
+module_param(dump_invalid_vmcs, bool, 0644);
+
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
@@ -5607,15 +5610,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
void dump_vmcs(void)
{
- u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
- u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
- u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
- u32 secondary_exec_control = 0;
- unsigned long cr4 = vmcs_readl(GUEST_CR4);
- u64 efer = vmcs_read64(GUEST_IA32_EFER);
+ u32 vmentry_ctl, vmexit_ctl;
+ u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
+ unsigned long cr4;
+ u64 efer;
int i, n;
+ if (!dump_invalid_vmcs) {
+ pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
+ return;
+ }
+
+ vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
+ vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
+ cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ cr4 = vmcs_readl(GUEST_CR4);
+ efer = vmcs_read64(GUEST_IA32_EFER);
+ secondary_exec_control = 0;
if (cpu_has_secondary_exec_ctrls())
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 63d37ccce3dc..61128b48c503 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -319,6 +319,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
+void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 536b78c4af6e..acb179f78fdc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
* tuning, i.e. allows priveleged userspace to set an exact advancement time.
*/
static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -1298,7 +1298,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u64 efer = msr_info->data;
if (efer & efer_reserved_bits)
- return false;
+ return 1;
if (!msr_info->host_initiated) {
if (!__kvm_valid_efer(vcpu, efer))