aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/arm/aarch32.c131
-rw-r--r--virt/kvm/arm/arch_timer.c5
-rw-r--r--virt/kvm/arm/arm.c117
-rw-r--r--virt/kvm/arm/mmio.c68
-rw-r--r--virt/kvm/arm/mmu.c62
-rw-r--r--virt/kvm/arm/perf.c6
-rw-r--r--virt/kvm/arm/pmu.c114
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c20
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c6
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c5
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c15
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h5
-rw-r--r--virt/kvm/async_pf.c31
-rw-r--r--virt/kvm/kvm_main.c435
14 files changed, 565 insertions, 455 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index c4c57ba99e90..0a356aa91aa1 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -10,10 +10,15 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#define DFSR_FSC_EXTABT_LPAE 0x10
+#define DFSR_FSC_EXTABT_nLPAE 0x08
+#define DFSR_LPAE BIT(9)
+
/*
* Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
*/
@@ -28,25 +33,115 @@ static const u8 return_offsets[8][2] = {
[7] = { 4, 4 }, /* FIQ, unused */
};
+/*
+ * When an exception is taken, most CPSR fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]).
+ *
+ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
+ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
+ * obsoleted by the ARMv7 virtualization extensions and is RES0.
+ *
+ * For the SPSR layout seen from AArch32, see:
+ * - ARM DDI 0406C.d, page B1-1148
+ * - ARM DDI 0487E.a, page G8-6264
+ *
+ * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
+ * - ARM DDI 0487E.a, page C5-426
+ *
+ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_AA32_N_BIT);
+ new |= (old & PSR_AA32_Z_BIT);
+ new |= (old & PSR_AA32_C_BIT);
+ new |= (old & PSR_AA32_V_BIT);
+ new |= (old & PSR_AA32_Q_BIT);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See ARM DDI 0487E.a, section G1.12.3
+ // See ARM DDI 0406C.d, section B1.8.3
+
+ new |= (old & PSR_AA32_DIT_BIT);
+
+ // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
+ // See ARM DDI 0487E.a, page G8-6244
+ if (sctlr & BIT(31))
+ new |= PSR_AA32_SSBS_BIT;
+
+ // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
+ // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page G8-6246
+ new |= (old & PSR_AA32_PAN_BIT);
+ if (!(sctlr & BIT(23)))
+ new |= PSR_AA32_PAN_BIT;
+
+ // SS does not exist in AArch32, so ignore
+
+ // CPSR.IL is set to zero upon any exception
+ // See ARM DDI 0487E.a, page G1-5527
+
+ new |= (old & PSR_AA32_GE_MASK);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See prior comment above
+
+ // CPSR.E is set to SCTLR.EE upon any exception
+ // See ARM DDI 0487E.a, page G8-6245
+ // See ARM DDI 0406C.d, page B4-1701
+ if (sctlr & BIT(25))
+ new |= PSR_AA32_E_BIT;
+
+ // CPSR.A is unchanged upon an exception to Undefined, Supervisor
+ // CPSR.A is set upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_A_BIT);
+ if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
+ new |= PSR_AA32_A_BIT;
+
+ // CPSR.I is set upon any exception
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= PSR_AA32_I_BIT;
+
+ // CPSR.F is set upon an exception to FIQ
+ // CPSR.F is unchanged upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_F_BIT);
+ if (mode == PSR_AA32_MODE_FIQ)
+ new |= PSR_AA32_F_BIT;
+
+ // CPSR.T is set to SCTLR.TE upon any exception
+ // See ARM DDI 0487E.a, page G8-5514
+ // See ARM DDI 0406C.d, page B1-1181
+ if (sctlr & BIT(30))
+ new |= PSR_AA32_T_BIT;
+
+ new |= mode;
+
+ return new;
+}
+
static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
{
- unsigned long cpsr;
- unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
- bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT);
+ unsigned long spsr = *vcpu_cpsr(vcpu);
+ bool is_thumb = (spsr & PSR_AA32_T_BIT);
u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
- cpsr = mode | PSR_AA32_I_BIT;
-
- if (sctlr & (1 << 30))
- cpsr |= PSR_AA32_T_BIT;
- if (sctlr & (1 << 25))
- cpsr |= PSR_AA32_E_BIT;
-
- *vcpu_cpsr(vcpu) = cpsr;
+ *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
/* Note: These now point to the banked copies */
- vcpu_write_spsr(vcpu, new_spsr_value);
+ vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr));
*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
@@ -84,16 +179,18 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
fsr = &vcpu_cp15(vcpu, c5_DFSR);
}
- prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset);
+ prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset);
*far = addr;
/* Give the guest an IMPLEMENTATION DEFINED exception */
is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
- if (is_lpae)
- *fsr = 1 << 9 | 0x34;
- else
- *fsr = 0x14;
+ if (is_lpae) {
+ *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
+ } else {
+ /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+ *fsr = DFSR_FSC_EXTABT_nLPAE;
+ }
}
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f182b2380345..0d9438e9de2a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
switch (treg) {
case TIMER_REG_TVAL:
val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
+ val &= lower_32_bits(val);
break;
case TIMER_REG_CTL:
@@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
{
switch (treg) {
case TIMER_REG_TVAL:
- timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
+ timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val;
break;
case TIMER_REG_CTL:
@@ -1022,7 +1023,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
bool kvm_arch_timer_get_input_level(int vintid)
{
- struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 12e0280291ce..d65a0faa46d8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -20,8 +20,6 @@
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
-#include <kvm/arm_pmu.h>
-#include <kvm/arm_psci.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -51,9 +49,6 @@ __asm__(".arch_extension virt");
DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-/* Per-CPU variable containing the currently running vcpu. */
-static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
-
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u32 kvm_next_vmid;
@@ -62,31 +57,8 @@ static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
-
-static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(kvm_arm_running_vcpu, vcpu);
-}
-
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
-/**
- * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
- * Must be called from non-preemptible context
- */
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
-{
- return __this_cpu_read(kvm_arm_running_vcpu);
-}
-
-/**
- * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
- */
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
-{
- return &kvm_arm_running_vcpu;
-}
-
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -194,7 +166,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
- kvm_arch_vcpu_free(kvm->vcpus[i]);
+ kvm_vcpu_destroy(kvm->vcpus[i]);
kvm->vcpus[i] = NULL;
}
}
@@ -279,49 +251,46 @@ void kvm_arch_free_vm(struct kvm *kvm)
vfree(kvm);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+ return -EBUSY;
+
+ if (id >= kvm->arch.max_vcpus)
+ return -EINVAL;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int err;
- struct kvm_vcpu *vcpu;
- if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
- err = -EBUSY;
- goto out;
- }
+ /* Force users to call KVM_ARM_VCPU_INIT */
+ vcpu->arch.target = -1;
+ bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
- if (id >= kvm->arch.max_vcpus) {
- err = -EINVAL;
- goto out;
- }
+ /* Set up the timer */
+ kvm_timer_vcpu_init(vcpu);
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu) {
- err = -ENOMEM;
- goto out;
- }
+ kvm_pmu_vcpu_init(vcpu);
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ kvm_arm_reset_debug_ptr(vcpu);
+
+ kvm_arm_pvtime_vcpu_init(&vcpu->arch);
- err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
+ err = kvm_vgic_vcpu_init(vcpu);
if (err)
- goto vcpu_uninit;
+ return err;
- return vcpu;
-vcpu_uninit:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+ return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
@@ -329,13 +298,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvm_arm_vcpu_destroy(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -368,24 +332,6 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
preempt_enable();
}
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- /* Force users to call KVM_ARM_VCPU_INIT */
- vcpu->arch.target = -1;
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
- /* Set up the timer */
- kvm_timer_vcpu_init(vcpu);
-
- kvm_pmu_vcpu_init(vcpu);
-
- kvm_arm_reset_debug_ptr(vcpu);
-
- kvm_arm_pvtime_vcpu_init(&vcpu->arch);
-
- return kvm_vgic_vcpu_init(vcpu);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
int *last_ran;
@@ -406,7 +352,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->cpu = cpu;
vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
- kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
@@ -432,8 +377,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_restore_host(vcpu);
vcpu->cpu = -1;
-
- kvm_arm_set_running_vcpu(NULL);
}
static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@ -1352,7 +1295,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
@@ -1386,7 +1329,7 @@ static void cpu_hyp_reinit(void)
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
- cpu_init_hyp_mode(NULL);
+ cpu_init_hyp_mode();
kvm_arm_init_debug();
@@ -1537,7 +1480,6 @@ static void teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
- hyp_cpu_pm_exit();
}
/**
@@ -1751,6 +1693,7 @@ int kvm_arch_init(void *opaque)
return 0;
out_hyp:
+ hyp_cpu_pm_exit();
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 70d3b449692c..aedfcff99ac5 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -5,7 +5,6 @@
*/
#include <linux/kvm_host.h>
-#include <asm/kvm_mmio.h>
#include <asm/kvm_emulate.h>
#include <trace/events/kvm.h>
@@ -92,23 +91,23 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mmio_needed = 0;
- if (!run->mmio.is_write) {
- len = run->mmio.len;
- if (len > sizeof(unsigned long))
- return -EINVAL;
-
+ if (!kvm_vcpu_dabt_iswrite(vcpu)) {
+ len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
- if (vcpu->arch.mmio_decode.sign_extend &&
+ if (kvm_vcpu_dabt_issext(vcpu) &&
len < sizeof(unsigned long)) {
mask = 1U << ((len * 8) - 1);
data = (data ^ mask) - mask;
}
+ if (!kvm_vcpu_dabt_issf(vcpu))
+ data = data & 0xffffffff;
+
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
&data);
data = vcpu_data_host_to_guest(vcpu, data, len);
- vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
+ vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data);
}
/*
@@ -120,33 +119,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
-{
- unsigned long rt;
- int access_size;
- bool sign_extend;
-
- if (kvm_vcpu_dabt_iss1tw(vcpu)) {
- /* page table accesses IO mem: tell guest to fix its TTBR */
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
-
- access_size = kvm_vcpu_dabt_get_as(vcpu);
- if (unlikely(access_size < 0))
- return access_size;
-
- *is_write = kvm_vcpu_dabt_iswrite(vcpu);
- sign_extend = kvm_vcpu_dabt_issext(vcpu);
- rt = kvm_vcpu_dabt_get_rd(vcpu);
-
- *len = access_size;
- vcpu->arch.mmio_decode.sign_extend = sign_extend;
- vcpu->arch.mmio_decode.rt = rt;
-
- return 0;
-}
-
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
@@ -158,15 +130,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
u8 data_buf[8];
/*
- * Prepare MMIO operation. First decode the syndrome data we get
- * from the CPU. Then try if some in-kernel emulation feels
- * responsible, otherwise let user space do its magic.
+ * No valid syndrome? Ask userspace for help if it has
+ * voluntered to do so, and bail out otherwise.
*/
- if (kvm_vcpu_dabt_isvalid(vcpu)) {
- ret = decode_hsr(vcpu, &is_write, &len);
- if (ret)
- return ret;
- } else {
+ if (!kvm_vcpu_dabt_isvalid(vcpu)) {
if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
run->exit_reason = KVM_EXIT_ARM_NISV;
run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
@@ -178,7 +145,20 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
return -ENOSYS;
}
- rt = vcpu->arch.mmio_decode.rt;
+ /* Page table accesses IO mem: tell guest to fix its TTBR */
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ return 1;
+ }
+
+ /*
+ * Prepare MMIO operation. First decode the syndrome data we get
+ * from the CPU. Then try if some in-kernel emulation feels
+ * responsible, otherwise let user space do its magic.
+ */
+ is_write = kvm_vcpu_dabt_iswrite(vcpu);
+ len = kvm_vcpu_dabt_get_as(vcpu);
+ rt = kvm_vcpu_dabt_get_rd(vcpu);
if (is_write) {
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 38b4c910b6c3..19c961ac4e3c 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -14,7 +14,6 @@
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
-#include <asm/kvm_mmio.h>
#include <asm/kvm_ras.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
@@ -38,6 +37,11 @@ static unsigned long io_map_base;
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
+static bool is_iomap(unsigned long flags)
+{
+ return flags & KVM_S2PTE_FLAG_IS_IOMAP;
+}
+
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1372,14 +1376,8 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
- struct page *page = pfn_to_page(pfn);
- /*
- * PageTransCompoundMap() returns true for THP and
- * hugetlbfs. Make sure the adjustment is done only for THP
- * pages.
- */
- if (!PageHuge(page) && PageTransCompoundMap(page)) {
+ if (kvm_is_transparent_hugepage(pfn)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
@@ -1591,16 +1589,8 @@ static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
__invalidate_icache_guest_page(pfn, size);
}
-static void kvm_send_hwpoison_signal(unsigned long address,
- struct vm_area_struct *vma)
+static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
{
- short lsb;
-
- if (is_vm_hugetlb_page(vma))
- lsb = huge_page_shift(hstate_vma(vma));
- else
- lsb = PAGE_SHIFT;
-
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
}
@@ -1673,6 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
+ short vma_shift;
kvm_pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
bool logging_active = memslot_is_logging(memslot);
@@ -1696,8 +1687,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- vma_pagesize = vma_kernel_pagesize(vma);
+ if (is_vm_hugetlb_page(vma))
+ vma_shift = huge_page_shift(hstate_vma(vma));
+ else
+ vma_shift = PAGE_SHIFT;
+
+ vma_pagesize = 1ULL << vma_shift;
if (logging_active ||
+ (vma->vm_flags & VM_PFNMAP) ||
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
force_pte = true;
vma_pagesize = PAGE_SIZE;
@@ -1735,7 +1732,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
if (pfn == KVM_PFN_ERR_HWPOISON) {
- kvm_send_hwpoison_signal(hva, vma);
+ kvm_send_hwpoison_signal(hva, vma_shift);
return 0;
}
if (is_error_noslot_pfn(pfn))
@@ -1760,6 +1757,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
+ if (exec_fault && is_iomap(flags))
+ return -ENOEXEC;
+
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1781,7 +1781,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
kvm_set_pfn_dirty(pfn);
- if (fault_status != FSC_PERM)
+ if (fault_status != FSC_PERM && !is_iomap(flags))
clean_dcache_guest_page(pfn, vma_pagesize);
if (exec_fault)
@@ -1948,9 +1948,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- goto out_unlock;
+ ret = -ENOEXEC;
+ goto out;
}
/*
@@ -1992,6 +1991,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
+out:
+ if (ret == -ENOEXEC) {
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ }
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2134,7 +2138,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
if (!kvm->arch.pgd)
return 0;
trace_kvm_test_age_hva(hva);
- return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+ return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
+ kvm_test_age_hva_handler, NULL);
}
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
@@ -2302,15 +2307,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
break;
/*
- * Mapping a read-only VMA is only allowed if the
- * memory region is configured as read-only.
- */
- if (writable && !(vma->vm_flags & VM_WRITE)) {
- ret = -EPERM;
- break;
- }
-
- /*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
diff --git a/virt/kvm/arm/perf.c b/virt/kvm/arm/perf.c
index 918cdc3839ea..d45b8b9a4415 100644
--- a/virt/kvm/arm/perf.c
+++ b/virt/kvm/arm/perf.c
@@ -13,14 +13,14 @@
static int kvm_is_in_guest(void)
{
- return kvm_arm_get_running_vcpu() != NULL;
+ return kvm_get_running_vcpu() != NULL;
}
static int kvm_is_user_mode(void)
{
struct kvm_vcpu *vcpu;
- vcpu = kvm_arm_get_running_vcpu();
+ vcpu = kvm_get_running_vcpu();
if (vcpu)
return !vcpu_mode_priv(vcpu);
@@ -32,7 +32,7 @@ static unsigned long kvm_get_guest_ip(void)
{
struct kvm_vcpu *vcpu;
- vcpu = kvm_arm_get_running_vcpu();
+ vcpu = kvm_get_running_vcpu();
if (vcpu)
return *vcpu_pc(vcpu);
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8731dfeced8b..f0d0312c0a55 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -15,6 +15,8 @@
#include <kvm/arm_vgic.h>
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
@@ -75,6 +77,13 @@ static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
return pmc;
}
+static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
+{
+ if (kvm_pmu_idx_is_high_counter(pmc->idx))
+ return pmc - 1;
+ else
+ return pmc + 1;
+}
/**
* kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
@@ -238,10 +247,11 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
{
- int i;
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ int i;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for_each_set_bit(i, &mask, 32)
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
@@ -294,15 +304,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
pmc = &pmu->pmc[i];
- /*
- * For high counters of chained events we must recreate the
- * perf event with the long (64bit) attribute set.
- */
- if (kvm_pmu_pmc_is_chained(pmc) &&
- kvm_pmu_idx_is_high_counter(i)) {
- kvm_pmu_create_perf_event(vcpu, i);
- continue;
- }
+ /* A change in the enable state may affect the chain state */
+ kvm_pmu_update_pmc_chained(vcpu, i);
+ kvm_pmu_create_perf_event(vcpu, i);
/* At this point, pmc must be the canonical */
if (pmc->perf_event) {
@@ -335,15 +339,9 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
pmc = &pmu->pmc[i];
- /*
- * For high counters of chained events we must recreate the
- * perf event with the long (64bit) attribute unset.
- */
- if (kvm_pmu_pmc_is_chained(pmc) &&
- kvm_pmu_idx_is_high_counter(i)) {
- kvm_pmu_create_perf_event(vcpu, i);
- continue;
- }
+ /* A change in the enable state may affect the chain state */
+ kvm_pmu_update_pmc_chained(vcpu, i);
+ kvm_pmu_create_perf_event(vcpu, i);
/* At this point, pmc must be the canonical */
if (pmc->perf_event)
@@ -480,25 +478,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
*/
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
int i;
- u64 type, enable, reg;
- if (val == 0)
+ if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
return;
- enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+ /* Weed out disabled counters */
+ val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
+ u64 type, reg;
+
if (!(val & BIT(i)))
continue;
- type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
- & ARMV8_PMU_EVTYPE_EVENT;
- if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
- && (enable & BIT(i))) {
- reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+
+ /* PMSWINC only applies to ... SW_INC! */
+ type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
+ type &= ARMV8_PMU_EVTYPE_EVENT;
+ if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
+ continue;
+
+ /* increment this even SW_INC counter */
+ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+ reg = lower_32_bits(reg);
+ __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+
+ if (reg) /* no overflow on the low part */
+ continue;
+
+ if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
+ /* increment the high counter */
+ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
reg = lower_32_bits(reg);
- __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
- if (!reg)
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
+ if (!reg) /* mark overflow on the high counter */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
+ } else {
+ /* mark overflow on low counter */
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
}
}
}
@@ -510,10 +528,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
- u64 mask;
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
int i;
- mask = kvm_pmu_valid_counter_mask(vcpu);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
@@ -525,7 +542,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
- for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
+ for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
}
@@ -582,15 +599,14 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ if (kvm_pmu_pmc_is_chained(pmc)) {
/**
* The initial sample period (overflow count) of an event. For
* chained counters we only support overflow interrupts on the
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
- if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
- attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
@@ -621,25 +637,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
* @select_idx: The number of selected counter
*
* Update the chained bitmap based on the event type written in the
- * typer register.
+ * typer register and the enable state of the odd register.
*/
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
+ bool new_state, old_state;
+
+ old_state = kvm_pmu_pmc_is_chained(pmc);
+ new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
+ kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
+
+ if (old_state == new_state)
+ return;
- if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
+ canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
+ kvm_pmu_stop_counter(vcpu, canonical_pmc);
+ if (new_state) {
/*
* During promotion from !chained to chained we must ensure
* the adjacent counter is stopped and its event destroyed
*/
- if (!kvm_pmu_pmc_is_chained(pmc))
- kvm_pmu_stop_counter(vcpu, pmc);
-
+ kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
- } else {
- clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
+ return;
}
+ clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}
/**
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index b3c5de48064c..a963b9d766b7 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -70,7 +70,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, vcpu_lock_idx = -1, ret;
+ int i, ret;
struct kvm_vcpu *vcpu;
if (irqchip_in_kernel(kvm))
@@ -86,17 +86,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run while we create the vgic.
- */
ret = -EBUSY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!mutex_trylock(&vcpu->mutex))
- goto out_unlock;
- vcpu_lock_idx = i;
- }
+ if (!lock_all_vcpus(kvm))
+ return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.has_run_once)
@@ -125,10 +117,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&vcpu->mutex);
- }
+ unlock_all_vcpus(kvm);
return ret;
}
@@ -177,6 +166,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
break;
default:
kfree(dist->spis);
+ dist->spis = NULL;
return -EINVAL;
}
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 98c7360d9fb7..d53d34a33e35 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -839,9 +839,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
u32 event_id = its_cmd_get_id(its_cmd);
struct its_ite *ite;
-
ite = find_ite(its, device_id, event_id);
- if (ite && ite->collection) {
+ if (ite && its_is_collection_mapped(ite->collection)) {
/*
* Though the spec talks about removing the pending state, we
* don't bother here since we clear the ITTE anyway and the
@@ -2475,7 +2474,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
coll_id = val & KVM_ITS_CTE_ICID_MASK;
- if (target_addr >= atomic_read(&kvm->online_vcpus))
+ if (target_addr != COLLECTION_NOT_MAPPED &&
+ target_addr >= atomic_read(&kvm->online_vcpus))
return -EINVAL;
collection = find_collection(its, coll_id);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 7dfd15dbb308..ebc218840fc2 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -414,8 +414,11 @@ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u64 value = vgic_cpu->pendbaser;
- return extract_bytes(vgic_cpu->pendbaser, addr & 7, len);
+ value &= ~GICR_PENDBASER_PTZ;
+
+ return extract_bytes(value, addr & 7, len);
}
static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 0d090482720d..d656ebd5f9d4 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -190,15 +190,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
* value later will give us the same value as we update the per-CPU variable
* in the preempt notifier handlers.
*/
-static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
-{
- struct kvm_vcpu *vcpu;
-
- preempt_disable();
- vcpu = kvm_arm_get_running_vcpu();
- preempt_enable();
- return vcpu;
-}
/* Must be called with irq->irq_lock held */
static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
@@ -221,7 +212,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- bool is_uaccess = !vgic_get_mmio_requester_vcpu();
+ bool is_uaccess = !kvm_get_running_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -274,7 +265,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- bool is_uaccess = !vgic_get_mmio_requester_vcpu();
+ bool is_uaccess = !kvm_get_running_vcpu();
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
unsigned long flags;
@@ -335,7 +326,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool active)
{
unsigned long flags;
- struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
+ struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu();
raw_spin_lock_irqsave(&irq->irq_lock, flags);
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 836f418f1ee8..5af2aefad435 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -98,11 +98,6 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.uaccess_write = uwr, \
}
-int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
- struct vgic_register_region *reg_desc,
- struct vgic_io_device *region,
- int nr_irqs, bool offset_private);
-
unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 35305d6e68cc..15e5b037f92d 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -17,21 +17,6 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
-static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
-#ifdef CONFIG_KVM_ASYNC_PF_SYNC
- kvm_arch_async_page_present(vcpu, work);
-#endif
-}
-static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
- struct kvm_async_pf *work)
-{
-#ifndef CONFIG_KVM_ASYNC_PF_SYNC
- kvm_arch_async_page_present(vcpu, work);
-#endif
-}
-
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
@@ -64,7 +49,7 @@ static void async_pf_execute(struct work_struct *work)
struct mm_struct *mm = apf->mm;
struct kvm_vcpu *vcpu = apf->vcpu;
unsigned long addr = apf->addr;
- gva_t gva = apf->gva;
+ gpa_t cr2_or_gpa = apf->cr2_or_gpa;
int locked = 1;
might_sleep();
@@ -80,7 +65,8 @@ static void async_pf_execute(struct work_struct *work)
if (locked)
up_read(&mm->mmap_sem);
- kvm_async_page_present_sync(vcpu, apf);
+ if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
+ kvm_arch_async_page_present(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
@@ -92,7 +78,7 @@ static void async_pf_execute(struct work_struct *work)
* this point
*/
- trace_kvm_async_pf_completed(addr, gva);
+ trace_kvm_async_pf_completed(addr, cr2_or_gpa);
if (swq_has_sleeper(&vcpu->wq))
swake_up_one(&vcpu->wq);
@@ -157,7 +143,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->async_pf.lock);
kvm_arch_async_page_ready(vcpu, work);
- kvm_async_page_present_async(vcpu, work);
+ if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
+ kvm_arch_async_page_present(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
@@ -165,8 +152,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
- struct kvm_arch_async_pf *arch)
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ unsigned long hva, struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
@@ -185,7 +172,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
work->wakeup_all = false;
work->vcpu = vcpu;
- work->gva = gva;
+ work->cr2_or_gpa = cr2_or_gpa;
work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 00268290dcbd..7e63a3236364 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -104,16 +104,16 @@ static cpumask_var_t cpus_hardware_enabled;
static int kvm_usage_count;
static atomic_t hardware_enable_failed;
-struct kmem_cache *kvm_vcpu_cache;
-EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
+static struct kmem_cache *kvm_vcpu_cache;
static __read_mostly struct preempt_ops kvm_preempt_ops;
+static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu);
struct dentry *kvm_debugfs_dir;
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
static int kvm_debugfs_num_entries;
-static const struct file_operations *stat_fops_per_vm[];
+static const struct file_operations stat_fops_per_vm;
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
@@ -191,12 +191,24 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
return true;
}
+bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ if (!PageTransCompoundMap(page))
+ return false;
+
+ return is_transparent_hugepage(compound_head(page));
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
void vcpu_load(struct kvm_vcpu *vcpu)
{
int cpu = get_cpu();
+
+ __this_cpu_write(kvm_running_vcpu, vcpu);
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
@@ -208,6 +220,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_arch_vcpu_put(vcpu);
preempt_notifier_unregister(&vcpu->preempt_notifier);
+ __this_cpu_write(kvm_running_vcpu, NULL);
preempt_enable();
}
EXPORT_SYMBOL_GPL(vcpu_put);
@@ -322,11 +335,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
-int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
+static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
- struct page *page;
- int r;
-
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
vcpu->kvm = kvm;
@@ -338,42 +348,28 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->pre_pcpu = -1;
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
- goto fail;
- }
- vcpu->run = page_address(page);
-
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
vcpu->ready = false;
-
- r = kvm_arch_vcpu_init(vcpu);
- if (r < 0)
- goto fail_free_run;
- return 0;
-
-fail_free_run:
- free_page((unsigned long)vcpu->run);
-fail:
- return r;
+ preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_init);
-void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
+void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ kvm_arch_vcpu_destroy(vcpu);
+
/*
- * no need for rcu_read_lock as VCPU_RUN is the only place that
- * will change the vcpu->pid pointer and on uninit all file
- * descriptors are already gone.
+ * No need for rcu_read_lock as VCPU_RUN is the only place that changes
+ * the vcpu->pid pointer, and at destruction time all file descriptors
+ * are already gone.
*/
put_pid(rcu_dereference_protected(vcpu->pid, 1));
- kvm_arch_vcpu_uninit(vcpu);
+
free_page((unsigned long)vcpu->run);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
+EXPORT_SYMBOL_GPL(kvm_vcpu_destroy);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
@@ -650,11 +646,11 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return -ENOMEM;
stat_data->kvm = kvm;
- stat_data->offset = p->offset;
- stat_data->mode = p->mode ? p->mode : 0644;
+ stat_data->dbgfs_item = p;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry,
- stat_data, stat_fops_per_vm[p->kind]);
+ debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
+ kvm->debugfs_dentry, stat_data,
+ &stat_fops_per_vm);
}
return 0;
}
@@ -964,7 +960,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
/*
* Increment the new memslot generation a second time, dropping the
- * update in-progress flag and incrementing then generation based on
+ * update in-progress flag and incrementing the generation based on
* the number of address spaces. This provides a unique and easily
* identifiable generation number while the memslots are in flux.
*/
@@ -1117,7 +1113,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
*
* validation of sp->gfn happens in:
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
- * - kvm_is_visible_gfn (mmu_check_roots)
+ * - kvm_is_visible_gfn (mmu_check_root)
*/
kvm_arch_flush_shadow_memslot(kvm, slot);
@@ -1406,14 +1402,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct vm_area_struct *vma;
unsigned long addr, size;
size = PAGE_SIZE;
- addr = gfn_to_hva(kvm, gfn);
+ addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL);
if (kvm_is_error_hva(addr))
return PAGE_SIZE;
@@ -1519,7 +1515,7 @@ static inline int check_user_page_hwpoison(unsigned long addr)
/*
* The fast path to get the writable pfn which will be stored in @pfn,
* true indicates success, otherwise false is returned. It's also the
- * only part that runs if we can are in atomic context.
+ * only part that runs if we can in atomic context.
*/
static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
bool *writable, kvm_pfn_t *pfn)
@@ -1821,26 +1817,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);
-static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
- struct kvm_host_map *map)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+{
+ if (pfn == 0)
+ return;
+
+ if (cache)
+ cache->pfn = cache->gfn = 0;
+
+ if (dirty)
+ kvm_release_pfn_dirty(pfn);
+ else
+ kvm_release_pfn_clean(pfn);
+}
+
+static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct gfn_to_pfn_cache *cache, u64 gen)
+{
+ kvm_release_pfn(cache->pfn, cache->dirty, cache);
+
+ cache->pfn = gfn_to_pfn_memslot(slot, gfn);
+ cache->gfn = gfn;
+ cache->dirty = false;
+ cache->generation = gen;
+}
+
+static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool atomic)
{
kvm_pfn_t pfn;
void *hva = NULL;
struct page *page = KVM_UNMAPPED_PAGE;
+ struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
+ u64 gen = slots->generation;
if (!map)
return -EINVAL;
- pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (cache) {
+ if (!cache->pfn || cache->gfn != gfn ||
+ cache->generation != gen) {
+ if (atomic)
+ return -EAGAIN;
+ kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
+ }
+ pfn = cache->pfn;
+ } else {
+ if (atomic)
+ return -EAGAIN;
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ }
if (is_error_noslot_pfn(pfn))
return -EINVAL;
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- hva = kmap(page);
+ if (atomic)
+ hva = kmap_atomic(page);
+ else
+ hva = kmap(page);
#ifdef CONFIG_HAS_IOMEM
- } else {
+ } else if (!atomic) {
hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+ } else {
+ return -EINVAL;
#endif
}
@@ -1855,14 +1897,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
return 0;
}
+int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool atomic)
+{
+ return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
+ cache, atomic);
+}
+EXPORT_SYMBOL_GPL(kvm_map_gfn);
+
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
{
- return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
+ return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
+ NULL, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_map);
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
- bool dirty)
+static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+ struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache,
+ bool dirty, bool atomic)
{
if (!map)
return;
@@ -1870,23 +1923,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
if (!map->hva)
return;
- if (map->page != KVM_UNMAPPED_PAGE)
- kunmap(map->page);
+ if (map->page != KVM_UNMAPPED_PAGE) {
+ if (atomic)
+ kunmap_atomic(map->hva);
+ else
+ kunmap(map->page);
+ }
#ifdef CONFIG_HAS_IOMEM
- else
+ else if (!atomic)
memunmap(map->hva);
+ else
+ WARN_ONCE(1, "Unexpected unmapping in atomic context");
#endif
- if (dirty) {
- kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
- kvm_release_pfn_dirty(map->pfn);
- } else {
- kvm_release_pfn_clean(map->pfn);
- }
+ if (dirty)
+ mark_page_dirty_in_slot(memslot, map->gfn);
+
+ if (cache)
+ cache->dirty |= dirty;
+ else
+ kvm_release_pfn(map->pfn, dirty, NULL);
map->hva = NULL;
map->page = NULL;
}
+
+int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
+{
+ __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+ cache, dirty, atomic);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
+
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
+{
+ __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
+ dirty, false);
+}
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -1931,11 +2006,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
- struct page *page = pfn_to_page(pfn);
-
- SetPageDirty(page);
- }
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
+ SetPageDirty(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
@@ -2051,17 +2123,6 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
return 0;
}
-int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
- unsigned long len)
-{
- gfn_t gfn = gpa >> PAGE_SHIFT;
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- int offset = offset_in_page(gpa);
-
- return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
-}
-EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
-
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
void *data, unsigned long len)
{
@@ -2158,33 +2219,36 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
gfn_t nr_pages_needed = end_gfn - start_gfn + 1;
gfn_t nr_pages_avail;
- int r = start_gfn <= end_gfn ? 0 : -EINVAL;
- ghc->gpa = gpa;
+ /* Update ghc->generation before performing any error checks. */
ghc->generation = slots->generation;
- ghc->len = len;
- ghc->hva = KVM_HVA_ERR_BAD;
+
+ if (start_gfn > end_gfn) {
+ ghc->hva = KVM_HVA_ERR_BAD;
+ return -EINVAL;
+ }
/*
* If the requested region crosses two memslots, we still
* verify that the entire region is valid here.
*/
- while (!r && start_gfn <= end_gfn) {
+ for ( ; start_gfn <= end_gfn; start_gfn += nr_pages_avail) {
ghc->memslot = __gfn_to_memslot(slots, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
&nr_pages_avail);
if (kvm_is_error_hva(ghc->hva))
- r = -EFAULT;
- start_gfn += nr_pages_avail;
+ return -EFAULT;
}
/* Use the slow path for cross page reads and writes. */
- if (!r && nr_pages_needed == 1)
+ if (nr_pages_needed == 1)
ghc->hva += offset;
else
ghc->memslot = NULL;
- return r;
+ ghc->gpa = gpa;
+ ghc->len = len;
+ return 0;
}
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
@@ -2205,15 +2269,17 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
BUG_ON(len + offset > ghc->len);
- if (slots->generation != ghc->generation)
- __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
-
- if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, gpa, data, len);
+ if (slots->generation != ghc->generation) {
+ if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
+ return -EFAULT;
+ }
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_write_guest(kvm, gpa, data, len);
+
r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
if (r)
return -EFAULT;
@@ -2238,15 +2304,17 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
BUG_ON(len > ghc->len);
- if (slots->generation != ghc->generation)
- __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
-
- if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
+ if (slots->generation != ghc->generation) {
+ if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
+ return -EFAULT;
+ }
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_read_guest(kvm, ghc->gpa, data, len);
+
r = __copy_from_user(data, (void __user *)ghc->hva, len);
if (r)
return -EFAULT;
@@ -2718,6 +2786,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
{
int r;
struct kvm_vcpu *vcpu;
+ struct page *page;
if (id >= KVM_MAX_VCPU_ID)
return -EINVAL;
@@ -2731,17 +2800,29 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
kvm->created_vcpus++;
mutex_unlock(&kvm->lock);
- vcpu = kvm_arch_vcpu_create(kvm, id);
- if (IS_ERR(vcpu)) {
- r = PTR_ERR(vcpu);
+ r = kvm_arch_vcpu_precreate(kvm, id);
+ if (r)
+ goto vcpu_decrement;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu) {
+ r = -ENOMEM;
goto vcpu_decrement;
}
- preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+ BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page) {
+ r = -ENOMEM;
+ goto vcpu_free;
+ }
+ vcpu->run = page_address(page);
+
+ kvm_vcpu_init(vcpu, kvm, id);
- r = kvm_arch_vcpu_setup(vcpu);
+ r = kvm_arch_vcpu_create(vcpu);
if (r)
- goto vcpu_destroy;
+ goto vcpu_free_run_page;
kvm_create_vcpu_debugfs(vcpu);
@@ -2778,8 +2859,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
debugfs_remove_recursive(vcpu->debugfs_dentry);
-vcpu_destroy:
kvm_arch_vcpu_destroy(vcpu);
+vcpu_free_run_page:
+ free_page((unsigned long)vcpu->run);
+vcpu_free:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
vcpu_decrement:
mutex_lock(&kvm->lock);
kvm->created_vcpus--;
@@ -4013,8 +4097,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
return -ENOENT;
if (simple_attr_open(inode, file, get,
- stat_data->mode & S_IWUGO ? set : NULL,
- fmt)) {
+ KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222
+ ? set : NULL,
+ fmt)) {
kvm_put_kvm(stat_data->kvm);
return -ENOMEM;
}
@@ -4033,105 +4118,111 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file)
return 0;
}
-static int vm_stat_get_per_vm(void *data, u64 *val)
+static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val)
{
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ *val = *(ulong *)((void *)kvm + offset);
+
+ return 0;
+}
- *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
+static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
+{
+ *(ulong *)((void *)kvm + offset) = 0;
return 0;
}
-static int vm_stat_clear_per_vm(void *data, u64 val)
+static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
{
- struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ int i;
+ struct kvm_vcpu *vcpu;
- if (val)
- return -EINVAL;
+ *val = 0;
- *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0;
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ *val += *(u64 *)((void *)vcpu + offset);
return 0;
}
-static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
+static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset)
{
- __simple_attr_check_format("%llu\n", 0ull);
- return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
- vm_stat_clear_per_vm, "%llu\n");
-}
+ int i;
+ struct kvm_vcpu *vcpu;
-static const struct file_operations vm_stat_get_per_vm_fops = {
- .owner = THIS_MODULE,
- .open = vm_stat_get_per_vm_open,
- .release = kvm_debugfs_release,
- .read = simple_attr_read,
- .write = simple_attr_write,
- .llseek = no_llseek,
-};
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ *(u64 *)((void *)vcpu + offset) = 0;
+
+ return 0;
+}
-static int vcpu_stat_get_per_vm(void *data, u64 *val)
+static int kvm_stat_data_get(void *data, u64 *val)
{
- int i;
+ int r = -EFAULT;
struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
- struct kvm_vcpu *vcpu;
-
- *val = 0;
- kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
- *val += *(u64 *)((void *)vcpu + stat_data->offset);
+ switch (stat_data->dbgfs_item->kind) {
+ case KVM_STAT_VM:
+ r = kvm_get_stat_per_vm(stat_data->kvm,
+ stat_data->dbgfs_item->offset, val);
+ break;
+ case KVM_STAT_VCPU:
+ r = kvm_get_stat_per_vcpu(stat_data->kvm,
+ stat_data->dbgfs_item->offset, val);
+ break;
+ }
- return 0;
+ return r;
}
-static int vcpu_stat_clear_per_vm(void *data, u64 val)
+static int kvm_stat_data_clear(void *data, u64 val)
{
- int i;
+ int r = -EFAULT;
struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
- struct kvm_vcpu *vcpu;
if (val)
return -EINVAL;
- kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
- *(u64 *)((void *)vcpu + stat_data->offset) = 0;
+ switch (stat_data->dbgfs_item->kind) {
+ case KVM_STAT_VM:
+ r = kvm_clear_stat_per_vm(stat_data->kvm,
+ stat_data->dbgfs_item->offset);
+ break;
+ case KVM_STAT_VCPU:
+ r = kvm_clear_stat_per_vcpu(stat_data->kvm,
+ stat_data->dbgfs_item->offset);
+ break;
+ }
- return 0;
+ return r;
}
-static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
+static int kvm_stat_data_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
- return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
- vcpu_stat_clear_per_vm, "%llu\n");
+ return kvm_debugfs_open(inode, file, kvm_stat_data_get,
+ kvm_stat_data_clear, "%llu\n");
}
-static const struct file_operations vcpu_stat_get_per_vm_fops = {
- .owner = THIS_MODULE,
- .open = vcpu_stat_get_per_vm_open,
+static const struct file_operations stat_fops_per_vm = {
+ .owner = THIS_MODULE,
+ .open = kvm_stat_data_open,
.release = kvm_debugfs_release,
- .read = simple_attr_read,
- .write = simple_attr_write,
- .llseek = no_llseek,
-};
-
-static const struct file_operations *stat_fops_per_vm[] = {
- [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
- [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = no_llseek,
};
static int vm_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
u64 tmp_val;
*val = 0;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ kvm_get_stat_per_vm(kvm, offset, &tmp_val);
*val += tmp_val;
}
mutex_unlock(&kvm_lock);
@@ -4142,15 +4233,13 @@ static int vm_stat_clear(void *_offset, u64 val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
if (val)
return -EINVAL;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vm_stat_clear_per_vm((void *)&stat_tmp, 0);
+ kvm_clear_stat_per_vm(kvm, offset);
}
mutex_unlock(&kvm_lock);
@@ -4163,14 +4252,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
u64 tmp_val;
*val = 0;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ kvm_get_stat_per_vcpu(kvm, offset, &tmp_val);
*val += tmp_val;
}
mutex_unlock(&kvm_lock);
@@ -4181,15 +4268,13 @@ static int vcpu_stat_clear(void *_offset, u64 val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_stat_data stat_tmp = {.offset = offset};
if (val)
return -EINVAL;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- stat_tmp.kvm = kvm;
- vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
+ kvm_clear_stat_per_vcpu(kvm, offset);
}
mutex_unlock(&kvm_lock);
@@ -4262,9 +4347,8 @@ static void kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- int mode = p->mode ? p->mode : 0644;
- debugfs_create_file(p->name, mode, kvm_debugfs_dir,
- (void *)(long)p->offset,
+ debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
+ kvm_debugfs_dir, (void *)(long)p->offset,
stat_fops[p->kind]);
}
}
@@ -4304,8 +4388,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
WRITE_ONCE(vcpu->preempted, false);
WRITE_ONCE(vcpu->ready, false);
+ __this_cpu_write(kvm_running_vcpu, vcpu);
kvm_arch_sched_in(vcpu, cpu);
-
kvm_arch_vcpu_load(vcpu, cpu);
}
@@ -4319,6 +4403,25 @@ static void kvm_sched_out(struct preempt_notifier *pn,
WRITE_ONCE(vcpu->ready, true);
}
kvm_arch_vcpu_put(vcpu);
+ __this_cpu_write(kvm_running_vcpu, NULL);
+}
+
+/**
+ * kvm_get_running_vcpu - get the vcpu running on the current CPU.
+ * Thanks to preempt notifiers, this can also be called from
+ * preemptible context.
+ */
+struct kvm_vcpu *kvm_get_running_vcpu(void)
+{
+ return __this_cpu_read(kvm_running_vcpu);
+}
+
+/**
+ * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
+ */
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
+{
+ return &kvm_running_vcpu;
}
static void check_processor_compat(void *rtn)