aboutsummaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-12 15:35:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-12 15:35:14 -0700
commit39d7530d7494b4e47ba1856e741f513dafd17e3d (patch)
tree6b16a744047cff9ff77f26bc5811fe9d953a9b91 /virt
parentMerge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux (diff)
parentMerge tag 'kvm-arm-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD (diff)
downloadlinux-dev-39d7530d7494b4e47ba1856e741f513dafd17e3d.tar.xz
linux-dev-39d7530d7494b4e47ba1856e741f513dafd17e3d.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - support for chained PMU counters in guests - improved SError handling - handle Neoverse N1 erratum #1349291 - allow side-channel mitigation status to be migrated - standardise most AArch64 system register accesses to msr_s/mrs_s - fix host MPIDR corruption on 32bit - selftests ckleanups x86: - PMU event {white,black}listing - ability for the guest to disable host-side interrupt polling - fixes for enlightened VMCS (Hyper-V pv nested virtualization), - new hypercall to yield to IPI target - support for passing cstate MSRs through to the guest - lots of cleanups and optimizations Generic: - Some txt->rST conversions for the documentation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (128 commits) Documentation: virtual: Add toctree hooks Documentation: kvm: Convert cpuid.txt to .rst Documentation: virtual: Convert paravirt_ops.txt to .rst KVM: x86: Unconditionally enable irqs in guest context KVM: x86: PMU Event Filter kvm: x86: Fix -Wmissing-prototypes warnings KVM: Properly check if "page" is valid in kvm_vcpu_unmap KVM: arm/arm64: Initialise host's MPIDRs by reading the actual register KVM: LAPIC: Retry tune per-vCPU timer_advance_ns if adaptive tuning goes insane kvm: LAPIC: write down valid APIC registers KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS register KVM: arm/arm64: Add save/restore support for firmware workaround state arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests KVM: arm/arm64: Support chained PMU counters KVM: arm/arm64: Remove pmc->bitmask KVM: arm/arm64: Re-create event when setting counter value KVM: arm/arm64: Extract duplicated code to own function KVM: arm/arm64: Rename kvm_pmu_{enable/disable}_counter functions KVM: LAPIC: ARBPRI is a reserved register for x2APIC ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c24
-rw-r--r--virt/kvm/arm/arm.c7
-rw-r--r--virt/kvm/arm/pmu.c350
-rw-r--r--virt/kvm/arm/psci.c149
-rw-r--r--virt/kvm/irqchip.c4
-rw-r--r--virt/kvm/kvm_main.c41
6 files changed, 455 insertions, 120 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 1be486d5d7cb..e2bb5bd60227 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -237,10 +237,10 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
switch (index) {
case TIMER_VTIMER:
- cnt_ctl = read_sysreg_el0(cntv_ctl);
+ cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- cnt_ctl = read_sysreg_el0(cntp_ctl);
+ cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
/* GCC is braindead */
@@ -350,20 +350,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
- ctx->cnt_cval = read_sysreg_el0(cntv_cval);
+ ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
+ ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL);
/* Disable the timer */
- write_sysreg_el0(0, cntv_ctl);
+ write_sysreg_el0(0, SYS_CNTV_CTL);
isb();
break;
case TIMER_PTIMER:
- ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
- ctx->cnt_cval = read_sysreg_el0(cntp_cval);
+ ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
+ ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL);
/* Disable the timer */
- write_sysreg_el0(0, cntp_ctl);
+ write_sysreg_el0(0, SYS_CNTP_CTL);
isb();
break;
@@ -429,14 +429,14 @@ static void timer_restore_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- write_sysreg_el0(ctx->cnt_cval, cntv_cval);
+ write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
+ write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- write_sysreg_el0(ctx->cnt_cval, cntp_cval);
+ write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
+ write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
BUG();
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index bd5c55916d0d..f645c0fbf7ec 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -93,9 +93,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- *(int *)rtn = 0;
+ return 0;
}
@@ -1332,6 +1332,8 @@ static void cpu_hyp_reset(void)
static void cpu_hyp_reinit(void)
{
+ kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
+
cpu_hyp_reset();
if (is_kernel_in_hyp_mode())
@@ -1569,7 +1571,6 @@ static int init_hyp_mode(void)
kvm_host_data_t *cpu_data;
cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
- kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu);
err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
if (err) {
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index da740764a7ee..3dd8238ed246 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -13,29 +13,144 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
+static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
+
+#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
+
/**
- * kvm_pmu_get_counter_value - get PMU counter value
+ * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
* @vcpu: The vcpu pointer
* @select_idx: The counter index
*/
-u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
+static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
{
- u64 counter, reg, enabled, running;
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ return (select_idx == ARMV8_PMU_CYCLE_IDX &&
+ __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
+}
- reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
- counter = __vcpu_sys_reg(vcpu, reg);
+static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu;
+ struct kvm_vcpu_arch *vcpu_arch;
+
+ pmc -= pmc->idx;
+ pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
+ vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
+ return container_of(vcpu_arch, struct kvm_vcpu, arch);
+}
+
+/**
+ * kvm_pmu_pmc_is_chained - determine if the pmc is chained
+ * @pmc: The PMU counter pointer
+ */
+static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
+{
+ struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
- /* The real counter value is equal to the value of counter register plus
+ return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
+}
+
+/**
+ * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
+ * @select_idx: The counter index
+ */
+static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
+{
+ return select_idx & 0x1;
+}
+
+/**
+ * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
+ * @pmc: The PMU counter pointer
+ *
+ * When a pair of PMCs are chained together we use the low counter (canonical)
+ * to hold the underlying perf event.
+ */
+static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
+{
+ if (kvm_pmu_pmc_is_chained(pmc) &&
+ kvm_pmu_idx_is_high_counter(pmc->idx))
+ return pmc - 1;
+
+ return pmc;
+}
+
+/**
+ * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ */
+static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+ u64 eventsel, reg;
+
+ select_idx |= 0x1;
+
+ if (select_idx == ARMV8_PMU_CYCLE_IDX)
+ return false;
+
+ reg = PMEVTYPER0_EL0 + select_idx;
+ eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
+
+ return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
+/**
+ * kvm_pmu_get_pair_counter_value - get PMU counter value
+ * @vcpu: The vcpu pointer
+ * @pmc: The PMU counter pointer
+ */
+static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
+ struct kvm_pmc *pmc)
+{
+ u64 counter, counter_high, reg, enabled, running;
+
+ if (kvm_pmu_pmc_is_chained(pmc)) {
+ pmc = kvm_pmu_get_canonical_pmc(pmc);
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+
+ counter = __vcpu_sys_reg(vcpu, reg);
+ counter_high = __vcpu_sys_reg(vcpu, reg + 1);
+
+ counter = lower_32_bits(counter) | (counter_high << 32);
+ } else {
+ reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
+ ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
+ counter = __vcpu_sys_reg(vcpu, reg);
+ }
+
+ /*
+ * The real counter value is equal to the value of counter register plus
* the value perf event counts.
*/
if (pmc->perf_event)
counter += perf_event_read_value(pmc->perf_event, &enabled,
&running);
- return counter & pmc->bitmask;
+ return counter;
+}
+
+/**
+ * kvm_pmu_get_counter_value - get PMU counter value
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ */
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+ u64 counter;
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+
+ counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
+
+ if (kvm_pmu_pmc_is_chained(pmc) &&
+ kvm_pmu_idx_is_high_counter(select_idx))
+ counter = upper_32_bits(counter);
+
+ else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+ counter = lower_32_bits(counter);
+
+ return counter;
}
/**
@@ -51,6 +166,23 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
+
+ /* Recreate the perf event to reflect the updated sample_period */
+ kvm_pmu_create_perf_event(vcpu, select_idx);
+}
+
+/**
+ * kvm_pmu_release_perf_event - remove the perf event
+ * @pmc: The PMU counter pointer
+ */
+static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
+{
+ pmc = kvm_pmu_get_canonical_pmc(pmc);
+ if (pmc->perf_event) {
+ perf_event_disable(pmc->perf_event);
+ perf_event_release_kernel(pmc->perf_event);
+ pmc->perf_event = NULL;
+ }
}
/**
@@ -63,15 +195,23 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
u64 counter, reg;
- if (pmc->perf_event) {
- counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
+ pmc = kvm_pmu_get_canonical_pmc(pmc);
+ if (!pmc->perf_event)
+ return;
+
+ counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
+
+ if (kvm_pmu_pmc_is_chained(pmc)) {
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+ __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+ __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+ } else {
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = counter;
- perf_event_disable(pmc->perf_event);
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
+ __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
}
+
+ kvm_pmu_release_perf_event(pmc);
}
/**
@@ -87,8 +227,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
pmu->pmc[i].idx = i;
- pmu->pmc[i].bitmask = 0xffffffffUL;
}
+
+ bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
}
/**
@@ -101,15 +242,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
- struct kvm_pmc *pmc = &pmu->pmc[i];
-
- if (pmc->perf_event) {
- perf_event_disable(pmc->perf_event);
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
- }
- }
+ for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ kvm_pmu_release_perf_event(&pmu->pmc[i]);
}
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
@@ -124,13 +258,13 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
}
/**
- * kvm_pmu_enable_counter - enable selected PMU counter
+ * kvm_pmu_enable_counter_mask - enable selected PMU counters
* @vcpu: The vcpu pointer
* @val: the value guest writes to PMCNTENSET register
*
* Call perf_event_enable to start counting the perf event
*/
-void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
+void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
{
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
@@ -144,6 +278,18 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
continue;
pmc = &pmu->pmc[i];
+
+ /*
+ * For high counters of chained events we must recreate the
+ * perf event with the long (64bit) attribute set.
+ */
+ if (kvm_pmu_pmc_is_chained(pmc) &&
+ kvm_pmu_idx_is_high_counter(i)) {
+ kvm_pmu_create_perf_event(vcpu, i);
+ continue;
+ }
+
+ /* At this point, pmc must be the canonical */
if (pmc->perf_event) {
perf_event_enable(pmc->perf_event);
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
@@ -153,13 +299,13 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
}
/**
- * kvm_pmu_disable_counter - disable selected PMU counter
+ * kvm_pmu_disable_counter_mask - disable selected PMU counters
* @vcpu: The vcpu pointer
* @val: the value guest writes to PMCNTENCLR register
*
* Call perf_event_disable to stop counting the perf event
*/
-void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
+void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
{
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
@@ -173,6 +319,18 @@ void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
continue;
pmc = &pmu->pmc[i];
+
+ /*
+ * For high counters of chained events we must recreate the
+ * perf event with the long (64bit) attribute unset.
+ */
+ if (kvm_pmu_pmc_is_chained(pmc) &&
+ kvm_pmu_idx_is_high_counter(i)) {
+ kvm_pmu_create_perf_event(vcpu, i);
+ continue;
+ }
+
+ /* At this point, pmc must be the canonical */
if (pmc->perf_event)
perf_event_disable(pmc->perf_event);
}
@@ -262,17 +420,6 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_pmu_update_state(vcpu);
}
-static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu;
- struct kvm_vcpu_arch *vcpu_arch;
-
- pmc -= pmc->idx;
- pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
- vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
- return container_of(vcpu_arch, struct kvm_vcpu, arch);
-}
-
/**
* When the perf event overflows, set the overflow status and inform the vcpu.
*/
@@ -329,17 +476,15 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc;
u64 mask;
int i;
mask = kvm_pmu_valid_counter_mask(vcpu);
if (val & ARMV8_PMU_PMCR_E) {
- kvm_pmu_enable_counter(vcpu,
+ kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
} else {
- kvm_pmu_disable_counter(vcpu, mask);
+ kvm_pmu_disable_counter_mask(vcpu, mask);
}
if (val & ARMV8_PMU_PMCR_C)
@@ -349,11 +494,6 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
-
- if (val & ARMV8_PMU_PMCR_LC) {
- pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX];
- pmc->bitmask = 0xffffffffffffffffUL;
- }
}
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
@@ -363,50 +503,75 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
}
/**
- * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
+ * kvm_pmu_create_perf_event - create a perf event for a counter
* @vcpu: The vcpu pointer
- * @data: The data guest writes to PMXEVTYPER_EL0
* @select_idx: The number of selected counter
- *
- * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
- * event with given hardware event number. Here we call perf_event API to
- * emulate this action and create a kernel perf event for it.
*/
-void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
- u64 select_idx)
+static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ struct kvm_pmc *pmc;
struct perf_event *event;
struct perf_event_attr attr;
- u64 eventsel, counter;
+ u64 eventsel, counter, reg, data;
+
+ /*
+ * For chained counters the event type and filtering attributes are
+ * obtained from the low/even counter. We also use this counter to
+ * determine if the event is enabled/disabled.
+ */
+ pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
+
+ reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
+ ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
+ data = __vcpu_sys_reg(vcpu, reg);
kvm_pmu_stop_counter(vcpu, pmc);
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
- select_idx != ARMV8_PMU_CYCLE_IDX)
+ pmc->idx != ARMV8_PMU_CYCLE_IDX)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
- attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
+ attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
- attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
+ attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
- counter = kvm_pmu_get_counter_value(vcpu, select_idx);
- /* The initial sample period (overflow count) of an event. */
- attr.sample_period = (-counter) & pmc->bitmask;
+ counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- event = perf_event_create_kernel_counter(&attr, -1, current,
+ if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ /**
+ * The initial sample period (overflow count) of an event. For
+ * chained counters we only support overflow interrupts on the
+ * high counter.
+ */
+ attr.sample_period = (-counter) & GENMASK(63, 0);
+ event = perf_event_create_kernel_counter(&attr, -1, current,
+ kvm_pmu_perf_overflow,
+ pmc + 1);
+
+ if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+ } else {
+ /* The initial sample period (overflow count) of an event. */
+ if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+ attr.sample_period = (-counter) & GENMASK(63, 0);
+ else
+ attr.sample_period = (-counter) & GENMASK(31, 0);
+
+ event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, pmc);
+ }
+
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
@@ -416,6 +581,57 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
pmc->perf_event = event;
}
+/**
+ * kvm_pmu_update_pmc_chained - update chained bitmap
+ * @vcpu: The vcpu pointer
+ * @select_idx: The number of selected counter
+ *
+ * Update the chained bitmap based on the event type written in the
+ * typer register.
+ */
+static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+
+ if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ /*
+ * During promotion from !chained to chained we must ensure
+ * the adjacent counter is stopped and its event destroyed
+ */
+ if (!kvm_pmu_pmc_is_chained(pmc))
+ kvm_pmu_stop_counter(vcpu, pmc);
+
+ set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
+ } else {
+ clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
+ }
+}
+
+/**
+ * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
+ * @vcpu: The vcpu pointer
+ * @data: The data guest writes to PMXEVTYPER_EL0
+ * @select_idx: The number of selected counter
+ *
+ * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
+ * event with given hardware event number. Here we call perf_event API to
+ * emulate this action and create a kernel perf event for it.
+ */
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
+ u64 select_idx)
+{
+ u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
+
+ reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
+ ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
+
+ __vcpu_sys_reg(vcpu, reg) = event_type;
+
+ kvm_pmu_update_pmc_chained(vcpu, select_idx);
+ kvm_pmu_create_perf_event(vcpu, select_idx);
+}
+
bool kvm_arm_support_pmu_v3(void)
{
/*
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index be3c9cdca9f3..87927f7e1ee7 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -401,8 +401,16 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
feature = smccc_get_arg1(vcpu);
switch(feature) {
case ARM_SMCCC_ARCH_WORKAROUND_1:
- if (kvm_arm_harden_branch_predictor())
+ switch (kvm_arm_harden_branch_predictor()) {
+ case KVM_BP_HARDEN_UNKNOWN:
+ break;
+ case KVM_BP_HARDEN_WA_NEEDED:
val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_BP_HARDEN_NOT_REQUIRED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
break;
case ARM_SMCCC_ARCH_WORKAROUND_2:
switch (kvm_arm_have_ssbd()) {
@@ -430,42 +438,103 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
- return 1; /* PSCI version */
+ return 3; /* PSCI version and two workaround registers */
}
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
+ if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
+ return -EFAULT;
+
+ if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
+ return -EFAULT;
+
+ if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
return -EFAULT;
return 0;
}
+#define KVM_REG_FEATURE_LEVEL_WIDTH 4
+#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
+
+/*
+ * Convert the workaround level into an easy-to-compare number, where higher
+ * values mean better protection.
+ */
+static int get_kernel_wa_level(u64 regid)
+{
+ switch (regid) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (kvm_arm_harden_branch_predictor()) {
+ case KVM_BP_HARDEN_UNKNOWN:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case KVM_BP_HARDEN_WA_NEEDED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
+ case KVM_BP_HARDEN_NOT_REQUIRED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (kvm_arm_have_ssbd()) {
+ case KVM_SSBD_FORCE_DISABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ case KVM_SSBD_KERNEL:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
+ case KVM_SSBD_FORCE_ENABLE:
+ case KVM_SSBD_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
+ case KVM_SSBD_UNKNOWN:
+ default:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
+ }
+ }
+
+ return -EINVAL;
+}
+
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
- if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
val = kvm_psci_version(vcpu, vcpu->kvm);
- if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
- return 0;
+ if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
+ kvm_arm_get_vcpu_workaround_2_flag(vcpu))
+ val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
+ break;
+ default:
+ return -ENOENT;
}
- return -EINVAL;
+ if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
}
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
- if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
- void __user *uaddr = (void __user *)(long)reg->addr;
- bool wants_02;
- u64 val;
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int wa_level;
- if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
+ if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
+ {
+ bool wants_02;
wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
@@ -482,6 +551,54 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
vcpu->kvm->arch.psci_version = val;
return 0;
}
+ break;
+ }
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
+ return -EINVAL;
+
+ if (get_kernel_wa_level(reg->id) < val)
+ return -EINVAL;
+
+ return 0;
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
+ return -EINVAL;
+
+ wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
+
+ if (get_kernel_wa_level(reg->id) < wa_level)
+ return -EINVAL;
+
+ /* The enabled bit must not be set unless the level is AVAIL. */
+ if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
+ wa_level != val)
+ return -EINVAL;
+
+ /* Are we finished or do we need to check the enable bit ? */
+ if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
+ return 0;
+
+ /*
+ * If this kernel supports the workaround to be switched on
+ * or off, make sure it matches the requested setting.
+ */
+ switch (wa_level) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+ kvm_arm_set_vcpu_workaround_2_flag(vcpu,
+ val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+ kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
+ break;
+ }
+
+ return 0;
+ default:
+ return -ENOENT;
}
return -EINVAL;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 2e6fc7c66a11..58e4f88b2b9f 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -184,9 +184,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
nr_rt_entries += 1;
- new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
- GFP_KERNEL_ACCOUNT);
-
+ new = kzalloc(struct_size(new, map, nr_rt_entries), GFP_KERNEL_ACCOUNT);
if (!new)
return -ENOMEM;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2f2d24a4dd5c..b4ab59dd6846 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,7 +95,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_SPINLOCK(kvm_lock);
+DEFINE_MUTEX(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
@@ -680,9 +680,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
preempt_notifier_inc();
@@ -728,9 +728,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -1790,7 +1790,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
if (!map->hva)
return;
- if (map->page)
+ if (map->page != KVM_UNMAPPED_PAGE)
kunmap(map->page);
#ifdef CONFIG_HAS_IOMEM
else
@@ -4031,13 +4031,13 @@ static int vm_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -4050,12 +4050,12 @@ static int vm_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -4070,13 +4070,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -4089,12 +4089,12 @@ static int vcpu_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -4115,7 +4115,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
if (!kvm_dev.this_device || !kvm)
return;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
if (type == KVM_EVENT_CREATE_VM) {
kvm_createvm_count++;
kvm_active_vms++;
@@ -4124,7 +4124,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
created = kvm_createvm_count;
active = kvm_active_vms;
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
if (!env)
@@ -4221,6 +4221,11 @@ static void kvm_sched_out(struct preempt_notifier *pn,
kvm_arch_vcpu_put(vcpu);
}
+static void check_processor_compat(void *rtn)
+{
+ *(int *)rtn = kvm_arch_check_processor_compat();
+}
+
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
@@ -4252,9 +4257,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_0a;
for_each_online_cpu(cpu) {
- smp_call_function_single(cpu,
- kvm_arch_check_processor_compat,
- &r, 1);
+ smp_call_function_single(cpu, check_processor_compat, &r, 1);
if (r < 0)
goto out_free_1;
}