aboutsummaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c2
-rw-r--r--virt/kvm/arm/arm.c60
-rw-r--r--virt/kvm/arm/mmu.c20
-rw-r--r--virt/kvm/arm/psci.c1
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c14
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c81
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c88
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c6
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c141
-rw-r--r--virt/kvm/arm/vgic/vgic.h1
-rw-r--r--virt/kvm/kvm_main.c663
11 files changed, 764 insertions, 313 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 0d9438e9de2a..93bd59b46848 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -788,7 +788,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
vcpu_ptimer(vcpu), TIMER_REG_CTL);
case KVM_REG_ARM_PTIMER_CNT:
return kvm_arm_timer_read(vcpu,
- vcpu_vtimer(vcpu), TIMER_REG_CNT);
+ vcpu_ptimer(vcpu), TIMER_REG_CNT);
case KVM_REG_ARM_PTIMER_CVAL:
return kvm_arm_timer_read(vcpu,
vcpu_ptimer(vcpu), TIMER_REG_CVAL);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index eda7b624eab8..48d0ec44ad77 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -64,12 +64,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
@@ -625,6 +625,14 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
kvm_update_stolen_time(vcpu);
+
+ if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
+ /* The distributor enable bits were changed */
+ preempt_disable();
+ vgic_v4_put(vcpu, false);
+ vgic_v4_load(vcpu);
+ preempt_enable();
+ }
}
}
@@ -1181,55 +1189,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
return r;
}
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * Steps 1-4 below provide general overview of dirty page logging. See
- * kvm_get_dirty_log_protect() function description for additional details.
- *
- * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
- * always flush the TLB (step 4) even if previous step failed and the dirty
- * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
- * does not preclude user space subsequent dirty log read. Flushing TLB ensures
- * writes will be marked dirty for next log read.
- *
- * 1. Take a snapshot of the bit and clear it if needed.
- * 2. Write protect the corresponding page.
- * 3. Copy the snapshot to the userspace.
- * 4. Flush TLB's if needed.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
- r = kvm_get_dirty_log_protect(kvm, log, &flush);
-
- if (flush)
- kvm_flush_remote_tlbs(kvm);
-
- mutex_unlock(&kvm->slots_lock);
- return r;
}
-int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
{
- bool flush = false;
- int r;
-
- mutex_lock(&kvm->slots_lock);
-
- r = kvm_clear_dirty_log_protect(kvm, log, &flush);
-
- if (flush)
- kvm_flush_remote_tlbs(kvm);
-
- mutex_unlock(&kvm->slots_lock);
- return r;
+ kvm_flush_remote_tlbs(kvm);
}
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 19c961ac4e3c..e3b9ee268823 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
- phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
- phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+ phys_addr_t start, end;
+
+ if (WARN_ON_ONCE(!memslot))
+ return;
+
+ start = memslot->base_gfn << PAGE_SHIFT;
+ end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
stage2_wp_range(kvm, start, end);
@@ -2251,7 +2256,7 @@ out:
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -2349,17 +2354,10 @@ out:
return ret;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
}
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 17e2bdd4b76f..14a162e295a9 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_host.h>
#include <kvm/arm_psci.h>
#include <kvm/arm_hypercalls.h>
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index cc12fe9b2df3..b13a9e3f99dd 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -178,6 +178,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
struct kvm_vcpu *vcpu)
{
char *type;
+ bool pending;
+
if (irq->intid < VGIC_NR_SGIS)
type = "SGI";
else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
@@ -190,6 +192,16 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
print_header(s, irq, vcpu);
+ pending = irq->pending_latch;
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ int err;
+
+ err = irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &pending);
+ WARN_ON_ONCE(err);
+ }
+
seq_printf(s, " %s %4d "
" %2d "
"%d%d%d%d%d%d%d "
@@ -201,7 +213,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
"\n",
type, irq->intid,
(irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
- irq->pending_latch,
+ pending,
irq->line_level,
irq->active,
irq->enabled,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index ebc218840fc2..e72dcc454247 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -3,9 +3,11 @@
* VGICv3 MMIO handling functions
*/
+#include <linux/bitfield.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
#include <kvm/iodev.h>
#include <kvm/arm_vgic.h>
@@ -69,6 +71,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
if (vgic->enabled)
value |= GICD_CTLR_ENABLE_SS_G1;
value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
+ if (vgic->nassgireq)
+ value |= GICD_CTLR_nASSGIreq;
break;
case GICD_TYPER:
value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
@@ -80,6 +84,10 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
}
break;
+ case GICD_TYPER2:
+ if (kvm_vgic_global_state.has_gicv4_1)
+ value = GICD_TYPER2_nASSGIcap;
+ break;
case GICD_IIDR:
value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
@@ -97,17 +105,46 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- bool was_enabled = dist->enabled;
switch (addr & 0x0c) {
- case GICD_CTLR:
+ case GICD_CTLR: {
+ bool was_enabled, is_hwsgi;
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ was_enabled = dist->enabled;
+ is_hwsgi = dist->nassgireq;
+
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
- if (!was_enabled && dist->enabled)
+ /* Not a GICv4.1? No HW SGIs */
+ if (!kvm_vgic_global_state.has_gicv4_1)
+ val &= ~GICD_CTLR_nASSGIreq;
+
+ /* Dist stays enabled? nASSGIreq is RO */
+ if (was_enabled && dist->enabled) {
+ val &= ~GICD_CTLR_nASSGIreq;
+ val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi);
+ }
+
+ /* Switching HW SGIs? */
+ dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+ if (is_hwsgi != dist->nassgireq)
+ vgic_v4_configure_vsgis(vcpu->kvm);
+
+ if (kvm_vgic_global_state.has_gicv4_1 &&
+ was_enabled != dist->enabled)
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4);
+ else if (!was_enabled && dist->enabled)
vgic_kick_vcpus(vcpu->kvm);
+
+ mutex_unlock(&vcpu->kvm->lock);
break;
+ }
case GICD_TYPER:
+ case GICD_TYPER2:
case GICD_IIDR:
+ /* This is at best for documentation purposes... */
return;
}
}
@@ -116,10 +153,22 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
switch (addr & 0x0c) {
+ case GICD_TYPER2:
case GICD_IIDR:
if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
return -EINVAL;
+ return 0;
+ case GICD_CTLR:
+ /* Not a GICv4.1? No HW SGIs */
+ if (!kvm_vgic_global_state.has_gicv4_1)
+ val &= ~GICD_CTLR_nASSGIreq;
+
+ dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
+ dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+ return 0;
}
vgic_mmio_write_v3_misc(vcpu, addr, len, val);
@@ -257,8 +306,18 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
*/
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ bool state = irq->pending_latch;
- if (irq->pending_latch)
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ int err;
+
+ err = irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &state);
+ WARN_ON(err);
+ }
+
+ if (state)
value |= (1U << i);
vgic_put_irq(vcpu->kvm, irq);
@@ -942,8 +1001,18 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
* generate interrupts of either group.
*/
if (!irq->group || allow_group1) {
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ if (!irq->hw) {
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ } else {
+ /* HW SGI? Ask the GIC to inject it */
+ int err;
+ err = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ true);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
} else {
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 97fb2a40e6ba..2199302597fa 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -5,6 +5,8 @@
#include <linux/bitops.h>
#include <linux/bsearch.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <kvm/iodev.h>
@@ -59,6 +61,11 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
return value;
}
+static void vgic_update_vsgi(struct vgic_irq *irq)
+{
+ WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group));
+}
+
void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
unsigned int len, unsigned long val)
{
@@ -71,7 +78,12 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->group = !!(val & BIT(i));
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ vgic_update_vsgi(irq);
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ } else {
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ }
vgic_put_irq(vcpu->kvm, irq);
}
@@ -113,7 +125,21 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (vgic_irq_is_mapped_level(irq)) {
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ if (!irq->enabled) {
+ struct irq_data *data;
+
+ irq->enabled = true;
+ data = &irq_to_desc(irq->host_irq)->irq_data;
+ while (irqd_irq_disabled(data))
+ enable_irq(irq->host_irq);
+ }
+
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(vcpu->kvm, irq);
+
+ continue;
+ } else if (vgic_irq_is_mapped_level(irq)) {
bool was_high = irq->line_level;
/*
@@ -148,6 +174,8 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled)
+ disable_irq_nosync(irq->host_irq);
irq->enabled = false;
@@ -167,10 +195,22 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
unsigned long flags;
+ bool val;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (irq_is_pending(irq))
- value |= (1U << i);
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ int err;
+
+ val = false;
+ err = irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &val);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ } else {
+ val = irq_is_pending(irq);
+ }
+
+ value |= ((u32)val << i);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
@@ -215,6 +255,21 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
}
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ /* HW SGI? Ask the GIC to inject it */
+ int err;
+ err = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ true);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(vcpu->kvm, irq);
+
+ continue;
+ }
+
if (irq->hw)
vgic_hw_irq_spending(vcpu, irq, is_uaccess);
else
@@ -269,6 +324,20 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ /* HW SGI? Ask the GIC to clear its pending bit */
+ int err;
+ err = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ false);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(vcpu->kvm, irq);
+
+ continue;
+ }
+
if (irq->hw)
vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
else
@@ -318,8 +387,15 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_is_sgi(irq->intid)) {
vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
+ } else if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+ /*
+ * GICv4.1 VSGI feature doesn't track an active state,
+ * so let's not kid ourselves, there is nothing we can
+ * do here.
+ */
+ irq->active = false;
} else {
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u8 active_source;
@@ -493,6 +569,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
raw_spin_lock_irqsave(&irq->irq_lock, flags);
/* Narrow the priority range to what we actually support */
irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
+ if (irq->hw && vgic_irq_is_sgi(irq->intid))
+ vgic_update_vsgi(irq);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index f45635a6f0ec..2c9fc13e2c59 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -540,6 +540,8 @@ int vgic_v3_map_resources(struct kvm *kvm)
goto out;
}
+ if (kvm_vgic_global_state.has_gicv4_1)
+ vgic_v4_configure_vsgis(kvm);
dist->ready = true;
out:
@@ -595,7 +597,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
/* GICv4 support? */
if (info->has_v4) {
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
- kvm_info("GICv4 support %sabled\n",
+ kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable;
+ kvm_info("GICv4%s support %sabled\n",
+ kvm_vgic_global_state.has_gicv4_1 ? ".1" : "",
gicv4_enable ? "en" : "dis");
}
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 46f875589c47..27ac833e5ec7 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -67,10 +67,10 @@
* it. And if we've migrated our vcpu from one CPU to another, we must
* tell the ITS (so that the messages reach the right redistributor).
* This is done in two steps: first issue a irq_set_affinity() on the
- * irq corresponding to the vcpu, then call its_schedule_vpe(). You
- * must be in a non-preemptible context. On exit, another call to
- * its_schedule_vpe() tells the redistributor that we're done with the
- * vcpu.
+ * irq corresponding to the vcpu, then call its_make_vpe_resident().
+ * You must be in a non-preemptible context. On exit, a call to
+ * its_make_vpe_non_resident() tells the redistributor that we're done
+ * with the vcpu.
*
* Finally, the doorbell handling: Each vcpu is allocated an interrupt
* which will fire each time a VLPI is made pending whilst the vcpu is
@@ -86,7 +86,8 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
struct kvm_vcpu *vcpu = info;
/* We got the message, no need to fire again */
- if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
+ if (!kvm_vgic_global_state.has_gicv4_1 &&
+ !irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
disable_irq_nosync(irq);
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
@@ -96,6 +97,104 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
return IRQ_HANDLED;
}
+static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq)
+{
+ vpe->sgi_config[irq->intid].enabled = irq->enabled;
+ vpe->sgi_config[irq->intid].group = irq->group;
+ vpe->sgi_config[irq->intid].priority = irq->priority;
+}
+
+static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu)
+{
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ int i;
+
+ /*
+ * With GICv4.1, every virtual SGI can be directly injected. So
+ * let's pretend that they are HW interrupts, tied to a host
+ * IRQ. The SGI code will do its magic.
+ */
+ for (i = 0; i < VGIC_NR_SGIS; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i);
+ struct irq_desc *desc;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+ if (irq->hw)
+ goto unlock;
+
+ irq->hw = true;
+ irq->host_irq = irq_find_mapping(vpe->sgi_domain, i);
+
+ /* Transfer the full irq state to the vPE */
+ vgic_v4_sync_sgi_config(vpe, irq);
+ desc = irq_to_desc(irq->host_irq);
+ ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc),
+ false);
+ if (!WARN_ON(ret)) {
+ /* Transfer pending state */
+ ret = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ irq->pending_latch);
+ WARN_ON(ret);
+ irq->pending_latch = false;
+ }
+ unlock:
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
+static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < VGIC_NR_SGIS; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i);
+ struct irq_desc *desc;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+ if (!irq->hw)
+ goto unlock;
+
+ irq->hw = false;
+ ret = irq_get_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ &irq->pending_latch);
+ WARN_ON(ret);
+
+ desc = irq_to_desc(irq->host_irq);
+ irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+ unlock:
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
+/* Must be called with the kvm lock held */
+void vgic_v4_configure_vsgis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_arm_halt_guest(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (dist->nassgireq)
+ vgic_v4_enable_vsgis(vcpu);
+ else
+ vgic_v4_disable_vsgis(vcpu);
+ }
+
+ kvm_arm_resume_guest(kvm);
+}
+
/**
* vgic_v4_init - Initialize the GICv4 data structures
* @kvm: Pointer to the VM being initialized
@@ -140,6 +239,7 @@ int vgic_v4_init(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm) {
int irq = dist->its_vm.vpes[i]->irq;
+ unsigned long irq_flags = DB_IRQ_FLAGS;
/*
* Don't automatically enable the doorbell, as we're
@@ -147,8 +247,14 @@ int vgic_v4_init(struct kvm *kvm)
* blocked. Also disable the lazy disabling, as the
* doorbell could kick us out of the guest too
* early...
+ *
+ * On GICv4.1, the doorbell is managed in HW and must
+ * be left enabled.
*/
- irq_set_status_flags(irq, DB_IRQ_FLAGS);
+ if (kvm_vgic_global_state.has_gicv4_1)
+ irq_flags &= ~IRQ_NOAUTOEN;
+ irq_set_status_flags(irq, irq_flags);
+
ret = request_irq(irq, vgic_v4_doorbell_handler,
0, "vcpu", vcpu);
if (ret) {
@@ -199,19 +305,11 @@ void vgic_v4_teardown(struct kvm *kvm)
int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- struct irq_desc *desc = irq_to_desc(vpe->irq);
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- /*
- * If blocking, a doorbell is required. Undo the nested
- * disable_irq() calls...
- */
- while (need_db && irqd_irq_disabled(&desc->irq_data))
- enable_irq(vpe->irq);
-
- return its_schedule_vpe(vpe, false);
+ return its_make_vpe_non_resident(vpe, need_db);
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -232,18 +330,19 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
if (err)
return err;
- /* Disabled the doorbell, as we're about to enter the guest */
- disable_irq_nosync(vpe->irq);
-
- err = its_schedule_vpe(vpe, true);
+ err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled);
if (err)
return err;
/*
* Now that the VPE is resident, let's get rid of a potential
- * doorbell interrupt that would still be pending.
+ * doorbell interrupt that would still be pending. This is a
+ * GICv4.0 only "feature"...
*/
- return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
+ if (!kvm_vgic_global_state.has_gicv4_1)
+ err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
+
+ return err;
}
static struct vgic_its *vgic_get_its(struct kvm *kvm,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index c7fefd6b1c80..769e4802645e 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -316,5 +316,6 @@ void vgic_its_invalidate_cache(struct kvm *kvm);
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
+void vgic_v4_configure_vsgis(struct kvm *kvm);
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70f03ce0e5c1..74bdb7bf3295 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -149,8 +149,6 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
-static bool largepages_enabled = true;
-
#define KVM_EVENT_CREATE_VM 0
#define KVM_EVENT_DESTROY_VM 1
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
@@ -566,7 +564,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
return NULL;
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
- slots->id_to_index[i] = slots->memslots[i].id = i;
+ slots->id_to_index[i] = -1;
return slots;
}
@@ -580,18 +578,14 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
memslot->dirty_bitmap = NULL;
}
-/*
- * Free any memory in @free but not in @dont.
- */
-static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
- kvm_destroy_dirty_bitmap(free);
+ kvm_destroy_dirty_bitmap(slot);
- kvm_arch_free_memslot(kvm, free, dont);
+ kvm_arch_free_memslot(kvm, slot);
- free->npages = 0;
+ slot->flags = 0;
+ slot->npages = 0;
}
static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
@@ -602,7 +596,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
return;
kvm_for_each_memslot(memslot, slots)
- kvm_free_memslot(kvm, memslot, NULL);
+ kvm_free_memslot(kvm, memslot);
kvfree(slots);
}
@@ -860,9 +854,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
/*
* Allocation size is twice as large as the actual dirty bitmap size.
- * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
+ * See kvm_vm_ioctl_get_dirty_log() why this is needed.
*/
-static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
+static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
{
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
@@ -874,63 +868,165 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
}
/*
- * Insert memslot and re-sort memslots based on their GFN,
- * so binary search could be used to lookup GFN.
- * Sorting algorithm takes advantage of having initially
- * sorted array and known changed memslot position.
+ * Delete a memslot by decrementing the number of used slots and shifting all
+ * other entries in the array forward one spot.
*/
-static void update_memslots(struct kvm_memslots *slots,
- struct kvm_memory_slot *new,
- enum kvm_mr_change change)
+static inline void kvm_memslot_delete(struct kvm_memslots *slots,
+ struct kvm_memory_slot *memslot)
{
- int id = new->id;
- int i = slots->id_to_index[id];
struct kvm_memory_slot *mslots = slots->memslots;
+ int i;
- WARN_ON(mslots[i].id != id);
- switch (change) {
- case KVM_MR_CREATE:
- slots->used_slots++;
- WARN_ON(mslots[i].npages || !new->npages);
- break;
- case KVM_MR_DELETE:
- slots->used_slots--;
- WARN_ON(new->npages || !mslots[i].npages);
- break;
- default:
- break;
- }
+ if (WARN_ON(slots->id_to_index[memslot->id] == -1))
+ return;
- while (i < KVM_MEM_SLOTS_NUM - 1 &&
- new->base_gfn <= mslots[i + 1].base_gfn) {
- if (!mslots[i + 1].npages)
- break;
+ slots->used_slots--;
+
+ if (atomic_read(&slots->lru_slot) >= slots->used_slots)
+ atomic_set(&slots->lru_slot, 0);
+
+ for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
mslots[i] = mslots[i + 1];
slots->id_to_index[mslots[i].id] = i;
- i++;
}
+ mslots[i] = *memslot;
+ slots->id_to_index[memslot->id] = -1;
+}
+
+/*
+ * "Insert" a new memslot by incrementing the number of used slots. Returns
+ * the new slot's initial index into the memslots array.
+ */
+static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
+{
+ return slots->used_slots++;
+}
+
+/*
+ * Move a changed memslot backwards in the array by shifting existing slots
+ * with a higher GFN toward the front of the array. Note, the changed memslot
+ * itself is not preserved in the array, i.e. not swapped at this time, only
+ * its new index into the array is tracked. Returns the changed memslot's
+ * current index into the memslots array.
+ */
+static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
+ struct kvm_memory_slot *memslot)
+{
+ struct kvm_memory_slot *mslots = slots->memslots;
+ int i;
+
+ if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
+ WARN_ON_ONCE(!slots->used_slots))
+ return -1;
/*
- * The ">=" is needed when creating a slot with base_gfn == 0,
- * so that it moves before all those with base_gfn == npages == 0.
- *
- * On the other hand, if new->npages is zero, the above loop has
- * already left i pointing to the beginning of the empty part of
- * mslots, and the ">=" would move the hole backwards in this
- * case---which is wrong. So skip the loop when deleting a slot.
+ * Move the target memslot backward in the array by shifting existing
+ * memslots with a higher GFN (than the target memslot) towards the
+ * front of the array.
*/
- if (new->npages) {
- while (i > 0 &&
- new->base_gfn >= mslots[i - 1].base_gfn) {
- mslots[i] = mslots[i - 1];
- slots->id_to_index[mslots[i].id] = i;
- i--;
- }
- } else
- WARN_ON_ONCE(i != slots->used_slots);
+ for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
+ if (memslot->base_gfn > mslots[i + 1].base_gfn)
+ break;
+
+ WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
+
+ /* Shift the next memslot forward one and update its index. */
+ mslots[i] = mslots[i + 1];
+ slots->id_to_index[mslots[i].id] = i;
+ }
+ return i;
+}
+
+/*
+ * Move a changed memslot forwards in the array by shifting existing slots with
+ * a lower GFN toward the back of the array. Note, the changed memslot itself
+ * is not preserved in the array, i.e. not swapped at this time, only its new
+ * index into the array is tracked. Returns the changed memslot's final index
+ * into the memslots array.
+ */
+static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
+ struct kvm_memory_slot *memslot,
+ int start)
+{
+ struct kvm_memory_slot *mslots = slots->memslots;
+ int i;
+
+ for (i = start; i > 0; i--) {
+ if (memslot->base_gfn < mslots[i - 1].base_gfn)
+ break;
+
+ WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
+
+ /* Shift the next memslot back one and update its index. */
+ mslots[i] = mslots[i - 1];
+ slots->id_to_index[mslots[i].id] = i;
+ }
+ return i;
+}
+
+/*
+ * Re-sort memslots based on their GFN to account for an added, deleted, or
+ * moved memslot. Sorting memslots by GFN allows using a binary search during
+ * memslot lookup.
+ *
+ * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry
+ * at memslots[0] has the highest GFN.
+ *
+ * The sorting algorithm takes advantage of having initially sorted memslots
+ * and knowing the position of the changed memslot. Sorting is also optimized
+ * by not swapping the updated memslot and instead only shifting other memslots
+ * and tracking the new index for the update memslot. Only once its final
+ * index is known is the updated memslot copied into its position in the array.
+ *
+ * - When deleting a memslot, the deleted memslot simply needs to be moved to
+ * the end of the array.
+ *
+ * - When creating a memslot, the algorithm "inserts" the new memslot at the
+ * end of the array and then it forward to its correct location.
+ *
+ * - When moving a memslot, the algorithm first moves the updated memslot
+ * backward to handle the scenario where the memslot's GFN was changed to a
+ * lower value. update_memslots() then falls through and runs the same flow
+ * as creating a memslot to move the memslot forward to handle the scenario
+ * where its GFN was changed to a higher value.
+ *
+ * Note, slots are sorted from highest->lowest instead of lowest->highest for
+ * historical reasons. Originally, invalid memslots where denoted by having
+ * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots
+ * to the end of the array. The current algorithm uses dedicated logic to
+ * delete a memslot and thus does not rely on invalid memslots having GFN=0.
+ *
+ * The other historical motiviation for highest->lowest was to improve the
+ * performance of memslot lookup. KVM originally used a linear search starting
+ * at memslots[0]. On x86, the largest memslot usually has one of the highest,
+ * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a
+ * single memslot above the 4gb boundary. As the largest memslot is also the
+ * most likely to be referenced, sorting it to the front of the array was
+ * advantageous. The current binary search starts from the middle of the array
+ * and uses an LRU pointer to improve performance for all memslots and GFNs.
+ */
+static void update_memslots(struct kvm_memslots *slots,
+ struct kvm_memory_slot *memslot,
+ enum kvm_mr_change change)
+{
+ int i;
+
+ if (change == KVM_MR_DELETE) {
+ kvm_memslot_delete(slots, memslot);
+ } else {
+ if (change == KVM_MR_CREATE)
+ i = kvm_memslot_insert_back(slots);
+ else
+ i = kvm_memslot_move_backward(slots, memslot);
+ i = kvm_memslot_move_forward(slots, memslot, i);
- mslots[i] = *new;
- slots->id_to_index[mslots[i].id] = i;
+ /*
+ * Copy the memslot to its new position in memslots and update
+ * its index accordingly.
+ */
+ slots->memslots[i] = *memslot;
+ slots->id_to_index[memslot->id] = i;
+ }
}
static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
@@ -984,6 +1080,112 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
}
/*
+ * Note, at a minimum, the current number of used slots must be allocated, even
+ * when deleting a memslot, as we need a complete duplicate of the memslots for
+ * use when invalidating a memslot prior to deleting/moving the memslot.
+ */
+static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
+ enum kvm_mr_change change)
+{
+ struct kvm_memslots *slots;
+ size_t old_size, new_size;
+
+ old_size = sizeof(struct kvm_memslots) +
+ (sizeof(struct kvm_memory_slot) * old->used_slots);
+
+ if (change == KVM_MR_CREATE)
+ new_size = old_size + sizeof(struct kvm_memory_slot);
+ else
+ new_size = old_size;
+
+ slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
+ if (likely(slots))
+ memcpy(slots, old, old_size);
+
+ return slots;
+}
+
+static int kvm_set_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, int as_id,
+ enum kvm_mr_change change)
+{
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ int r;
+
+ slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
+ if (!slots)
+ return -ENOMEM;
+
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+ /*
+ * Note, the INVALID flag needs to be in the appropriate entry
+ * in the freshly allocated memslots, not in @old or @new.
+ */
+ slot = id_to_memslot(slots, old->id);
+ slot->flags |= KVM_MEMSLOT_INVALID;
+
+ /*
+ * We can re-use the old memslots, the only difference from the
+ * newly installed memslots is the invalid flag, which will get
+ * dropped by update_memslots anyway. We'll also revert to the
+ * old memslots if preparing the new memory region fails.
+ */
+ slots = install_new_memslots(kvm, as_id, slots);
+
+ /* From this point no new shadow pages pointing to a deleted,
+ * or moved, memslot will be created.
+ *
+ * validation of sp->gfn happens in:
+ * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+ * - kvm_is_visible_gfn (mmu_check_root)
+ */
+ kvm_arch_flush_shadow_memslot(kvm, slot);
+ }
+
+ r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
+ if (r)
+ goto out_slots;
+
+ update_memslots(slots, new, change);
+ slots = install_new_memslots(kvm, as_id, slots);
+
+ kvm_arch_commit_memory_region(kvm, mem, old, new, change);
+
+ kvfree(slots);
+ return 0;
+
+out_slots:
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+ slots = install_new_memslots(kvm, as_id, slots);
+ kvfree(slots);
+ return r;
+}
+
+static int kvm_delete_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot *old, int as_id)
+{
+ struct kvm_memory_slot new;
+ int r;
+
+ if (!old->npages)
+ return -EINVAL;
+
+ memset(&new, 0, sizeof(new));
+ new.id = old->id;
+
+ r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
+ if (r)
+ return r;
+
+ kvm_free_memslot(kvm, old);
+ return 0;
+}
+
+/*
* Allocate some memory and give it an address in the guest physical address
* space.
*
@@ -994,162 +1196,118 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
int __kvm_set_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem)
{
- int r;
- gfn_t base_gfn;
- unsigned long npages;
- struct kvm_memory_slot *slot;
struct kvm_memory_slot old, new;
- struct kvm_memslots *slots = NULL, *old_memslots;
- int as_id, id;
+ struct kvm_memory_slot *tmp;
enum kvm_mr_change change;
+ int as_id, id;
+ int r;
r = check_memory_region_flags(mem);
if (r)
- goto out;
+ return r;
- r = -EINVAL;
as_id = mem->slot >> 16;
id = (u16)mem->slot;
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
- goto out;
+ return -EINVAL;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
- goto out;
+ return -EINVAL;
/* We can read the guest memory with __xxx_user() later on. */
if ((id < KVM_USER_MEM_SLOTS) &&
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size)))
- goto out;
+ return -EINVAL;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
- goto out;
+ return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
- goto out;
-
- slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
- base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
- npages = mem->memory_size >> PAGE_SHIFT;
+ return -EINVAL;
- if (npages > KVM_MEM_MAX_NR_PAGES)
- goto out;
+ /*
+ * Make a full copy of the old memslot, the pointer will become stale
+ * when the memslots are re-sorted by update_memslots(), and the old
+ * memslot needs to be referenced after calling update_memslots(), e.g.
+ * to free its resources and for arch specific behavior.
+ */
+ tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+ if (tmp) {
+ old = *tmp;
+ tmp = NULL;
+ } else {
+ memset(&old, 0, sizeof(old));
+ old.id = id;
+ }
- new = old = *slot;
+ if (!mem->memory_size)
+ return kvm_delete_memslot(kvm, mem, &old, as_id);
new.id = id;
- new.base_gfn = base_gfn;
- new.npages = npages;
+ new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
+ new.npages = mem->memory_size >> PAGE_SHIFT;
new.flags = mem->flags;
+ new.userspace_addr = mem->userspace_addr;
- if (npages) {
- if (!old.npages)
- change = KVM_MR_CREATE;
- else { /* Modify an existing slot. */
- if ((mem->userspace_addr != old.userspace_addr) ||
- (npages != old.npages) ||
- ((new.flags ^ old.flags) & KVM_MEM_READONLY))
- goto out;
+ if (new.npages > KVM_MEM_MAX_NR_PAGES)
+ return -EINVAL;
- if (base_gfn != old.base_gfn)
- change = KVM_MR_MOVE;
- else if (new.flags != old.flags)
- change = KVM_MR_FLAGS_ONLY;
- else { /* Nothing to change. */
- r = 0;
- goto out;
- }
- }
- } else {
- if (!old.npages)
- goto out;
+ if (!old.npages) {
+ change = KVM_MR_CREATE;
+ new.dirty_bitmap = NULL;
+ memset(&new.arch, 0, sizeof(new.arch));
+ } else { /* Modify an existing slot. */
+ if ((new.userspace_addr != old.userspace_addr) ||
+ (new.npages != old.npages) ||
+ ((new.flags ^ old.flags) & KVM_MEM_READONLY))
+ return -EINVAL;
- change = KVM_MR_DELETE;
- new.base_gfn = 0;
- new.flags = 0;
+ if (new.base_gfn != old.base_gfn)
+ change = KVM_MR_MOVE;
+ else if (new.flags != old.flags)
+ change = KVM_MR_FLAGS_ONLY;
+ else /* Nothing to change. */
+ return 0;
+
+ /* Copy dirty_bitmap and arch from the current memslot. */
+ new.dirty_bitmap = old.dirty_bitmap;
+ memcpy(&new.arch, &old.arch, sizeof(new.arch));
}
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
/* Check for overlaps */
- r = -EEXIST;
- kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
- if (slot->id == id)
+ kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) {
+ if (tmp->id == id)
continue;
- if (!((base_gfn + npages <= slot->base_gfn) ||
- (base_gfn >= slot->base_gfn + slot->npages)))
- goto out;
+ if (!((new.base_gfn + new.npages <= tmp->base_gfn) ||
+ (new.base_gfn >= tmp->base_gfn + tmp->npages)))
+ return -EEXIST;
}
}
- /* Free page dirty bitmap if unneeded */
+ /* Allocate/free page dirty bitmap as needed */
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
new.dirty_bitmap = NULL;
+ else if (!new.dirty_bitmap) {
+ r = kvm_alloc_dirty_bitmap(&new);
+ if (r)
+ return r;
- r = -ENOMEM;
- if (change == KVM_MR_CREATE) {
- new.userspace_addr = mem->userspace_addr;
-
- if (kvm_arch_create_memslot(kvm, &new, npages))
- goto out_free;
- }
-
- /* Allocate page dirty bitmap if needed */
- if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
- if (kvm_create_dirty_bitmap(&new) < 0)
- goto out_free;
- }
-
- slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
- if (!slots)
- goto out_free;
- memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
-
- if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
- slot = id_to_memslot(slots, id);
- slot->flags |= KVM_MEMSLOT_INVALID;
-
- old_memslots = install_new_memslots(kvm, as_id, slots);
-
- /* From this point no new shadow pages pointing to a deleted,
- * or moved, memslot will be created.
- *
- * validation of sp->gfn happens in:
- * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
- * - kvm_is_visible_gfn (mmu_check_root)
- */
- kvm_arch_flush_shadow_memslot(kvm, slot);
-
- /*
- * We can re-use the old_memslots from above, the only difference
- * from the currently installed memslots is the invalid flag. This
- * will get overwritten by update_memslots anyway.
- */
- slots = old_memslots;
+ if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+ bitmap_set(new.dirty_bitmap, 0, new.npages);
}
- r = kvm_arch_prepare_memory_region(kvm, &new, mem, change);
+ r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
if (r)
- goto out_slots;
-
- /* actual memory is freed via old in kvm_free_memslot below */
- if (change == KVM_MR_DELETE) {
- new.dirty_bitmap = NULL;
- memset(&new.arch, 0, sizeof(new.arch));
- }
-
- update_memslots(slots, &new, change);
- old_memslots = install_new_memslots(kvm, as_id, slots);
-
- kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
+ goto out_bitmap;
- kvm_free_memslot(kvm, &old, &new);
- kvfree(old_memslots);
+ if (old.dirty_bitmap && !new.dirty_bitmap)
+ kvm_destroy_dirty_bitmap(&old);
return 0;
-out_slots:
- kvfree(slots);
-out_free:
- kvm_free_memslot(kvm, &new, &old);
-out:
+out_bitmap:
+ if (new.dirty_bitmap && !old.dirty_bitmap)
+ kvm_destroy_dirty_bitmap(&new);
return r;
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
@@ -1175,31 +1333,43 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
return kvm_set_memory_region(kvm, mem);
}
-int kvm_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log, int *is_dirty)
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+/**
+ * kvm_get_dirty_log - get a snapshot of dirty pages
+ * @kvm: pointer to kvm instance
+ * @log: slot id and address to which we copy the log
+ * @is_dirty: set to '1' if any dirty pages were found
+ * @memslot: set to the associated memslot, always valid on success
+ */
+int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
+ int *is_dirty, struct kvm_memory_slot **memslot)
{
struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
int i, as_id, id;
unsigned long n;
unsigned long any = 0;
+ *memslot = NULL;
+ *is_dirty = 0;
+
as_id = log->slot >> 16;
id = (u16)log->slot;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
- memslot = id_to_memslot(slots, id);
- if (!memslot->dirty_bitmap)
+ *memslot = id_to_memslot(slots, id);
+ if (!(*memslot) || !(*memslot)->dirty_bitmap)
return -ENOENT;
- n = kvm_dirty_bitmap_bytes(memslot);
+ kvm_arch_sync_dirty_log(kvm, *memslot);
+
+ n = kvm_dirty_bitmap_bytes(*memslot);
for (i = 0; !any && i < n/sizeof(long); ++i)
- any = memslot->dirty_bitmap[i];
+ any = (*memslot)->dirty_bitmap[i];
- if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n))
return -EFAULT;
if (any)
@@ -1208,13 +1378,12 @@ int kvm_get_dirty_log(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
/**
* kvm_get_dirty_log_protect - get a snapshot of dirty pages
* and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address to which we copy the log
- * @flush: true if TLB flush is needed by caller
*
* We need to keep it in mind that VCPU threads can write to the bitmap
* concurrently. So, to avoid losing track of dirty pages we keep the
@@ -1231,8 +1400,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
* exiting to userspace will be logged for the next call.
*
*/
-int kvm_get_dirty_log_protect(struct kvm *kvm,
- struct kvm_dirty_log *log, bool *flush)
+static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -1240,6 +1408,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
+ bool flush;
as_id = log->slot >> 16;
id = (u16)log->slot;
@@ -1248,13 +1417,15 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
+ if (!memslot || !memslot->dirty_bitmap)
+ return -ENOENT;
dirty_bitmap = memslot->dirty_bitmap;
- if (!dirty_bitmap)
- return -ENOENT;
+
+ kvm_arch_sync_dirty_log(kvm, memslot);
n = kvm_dirty_bitmap_bytes(memslot);
- *flush = false;
+ flush = false;
if (kvm->manual_dirty_log_protect) {
/*
* Unlike kvm_get_dirty_log, we always return false in *flush,
@@ -1277,7 +1448,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
if (!dirty_bitmap[i])
continue;
- *flush = true;
+ flush = true;
mask = xchg(&dirty_bitmap[i], 0);
dirty_bitmap_buffer[i] = mask;
@@ -1288,21 +1459,55 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
spin_unlock(&kvm->mmu_lock);
}
+ if (flush)
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+
+
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Steps 1-4 below provide general overview of dirty page logging. See
+ * kvm_get_dirty_log_protect() function description for additional details.
+ *
+ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
+ * always flush the TLB (step 4) even if previous step failed and the dirty
+ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
+ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
+ * writes will be marked dirty for next log read.
+ *
+ * 1. Take a snapshot of the bit and clear it if needed.
+ * 2. Write protect the corresponding page.
+ * 3. Copy the snapshot to the userspace.
+ * 4. Flush TLB's if needed.
+ */
+static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log_protect(kvm, log);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
/**
* kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
* and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address from which to fetch the bitmap of dirty pages
- * @flush: true if TLB flush is needed by caller
*/
-int kvm_clear_dirty_log_protect(struct kvm *kvm,
- struct kvm_clear_dirty_log *log, bool *flush)
+static int kvm_clear_dirty_log_protect(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -1311,6 +1516,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
unsigned long i, n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
+ bool flush;
as_id = log->slot >> 16;
id = (u16)log->slot;
@@ -1322,10 +1528,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
+ if (!memslot || !memslot->dirty_bitmap)
+ return -ENOENT;
dirty_bitmap = memslot->dirty_bitmap;
- if (!dirty_bitmap)
- return -ENOENT;
n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;
@@ -1334,7 +1540,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
(log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
return -EINVAL;
- *flush = false;
+ kvm_arch_sync_dirty_log(kvm, memslot);
+
+ flush = false;
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
return -EFAULT;
@@ -1357,28 +1565,32 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
* a problem if userspace sets them in log->dirty_bitmap.
*/
if (mask) {
- *flush = true;
+ flush = true;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
}
spin_unlock(&kvm->mmu_lock);
+ if (flush)
+ kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
-#endif
-bool kvm_largepages_enabled(void)
+static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log)
{
- return largepages_enabled;
-}
+ int r;
-void kvm_disable_largepages(void)
-{
- largepages_enabled = false;
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_clear_dirty_log_protect(kvm, log);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
}
-EXPORT_SYMBOL_GPL(kvm_disable_largepages);
+#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
@@ -1754,12 +1966,6 @@ kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
-kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
-{
- return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
-}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
-
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
@@ -3310,9 +3516,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
case KVM_CAP_ENABLE_CAP_VM:
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
-#endif
return 1;
#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
@@ -3320,6 +3523,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_COALESCED_PIO:
return 1;
#endif
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
+ return KVM_DIRTY_LOG_MANUAL_CAPS;
+#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
@@ -3347,11 +3554,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
{
switch (cap->cap) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
- if (cap->flags || (cap->args[0] & ~1))
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: {
+ u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE;
+
+ if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE)
+ allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS;
+
+ if (cap->flags || (cap->args[0] & ~allowed_options))
return -EINVAL;
kvm->manual_dirty_log_protect = cap->args[0];
return 0;
+ }
#endif
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
@@ -4435,14 +4648,22 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
return &kvm_running_vcpu;
}
-static void check_processor_compat(void *rtn)
+struct kvm_cpu_compat_check {
+ void *opaque;
+ int *ret;
+};
+
+static void check_processor_compat(void *data)
{
- *(int *)rtn = kvm_arch_check_processor_compat();
+ struct kvm_cpu_compat_check *c = data;
+
+ *c->ret = kvm_arch_check_processor_compat(c->opaque);
}
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
+ struct kvm_cpu_compat_check c;
int r;
int cpu;
@@ -4466,12 +4687,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_0;
}
- r = kvm_arch_hardware_setup();
+ r = kvm_arch_hardware_setup(opaque);
if (r < 0)
goto out_free_1;
+ c.ret = &r;
+ c.opaque = opaque;
for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_processor_compat, &r, 1);
+ smp_call_function_single(cpu, check_processor_compat, &c, 1);
if (r < 0)
goto out_free_2;
}