diff options
Diffstat (limited to 'arch/arm64/kvm/vgic')
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-debug.c | 16 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-init.c | 124 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-irqfd.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-its.c | 193 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-kvm-device.c | 438 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v3.c | 227 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.c | 141 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.h | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v2.c | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3.c | 58 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v4.c | 33 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.c | 99 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.h | 45 |
13 files changed, 682 insertions, 704 deletions
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 78cde687383c..389025ce7749 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; if (iter) { iter = ERR_PTR(-EBUSY); @@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) if (end_of_vgic(iter)) iter = NULL; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return iter; } @@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v) if (IS_ERR(v)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; kfree(iter->lpi_array); kfree(iter); kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) @@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); + seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count)); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq, if (vcpu) { hdr = "VCPU"; - id = vcpu->vcpu_id; + id = vcpu->vcpu_idx; } seq_printf(s, "\n"); @@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, " %2d " "\n", type, irq->intid, - (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1, pending, irq->line_level, irq->active, @@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->mpidr, irq->source, irq->priority, - (irq->vcpu) ? irq->vcpu->vcpu_id : -1); + (irq->vcpu) ? irq->vcpu->vcpu_idx : -1); } static int vgic_debug_show(struct seq_file *s, void *v) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index f6d4f4052555..f20941f83a07 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - INIT_LIST_HEAD(&dist->lpi_list_head); INIT_LIST_HEAD(&dist->lpi_translation_cache); raw_spin_lock_init(&dist->lpi_list_lock); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ @@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) unsigned long i; int ret; - if (irqchip_in_kernel(kvm)) - return -EEXIST; - /* * This function is also called by the KVM_CREATE_IRQCHIP handler, * which had no chance yet to check the availability of the GICv2 @@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; + /* Must be held to avoid race with vCPU creation */ + lockdep_assert_held(&kvm->lock); + ret = -EBUSY; if (!lock_all_vcpus(kvm)) return ret; + mutex_lock(&kvm->arch.config_lock); + + if (irqchip_in_kernel(kvm)) { + ret = -EEXIST; + goto out_unlock; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu_has_run_once(vcpu)) goto out_unlock; @@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); return ret; } @@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->slots_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->slots_lock); } return ret; } @@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) * The function is generally called when nr_spis has been explicitly set * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. * vgic_initialized() returns true when this function has succeeded. - * Must be called with kvm->lock held! */ int vgic_init(struct kvm *kvm) { @@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm) int ret = 0, i; unsigned long idx; + lockdep_assert_held(&kvm->arch.config_lock); + if (vgic_initialized(kvm)) return 0; @@ -300,7 +309,7 @@ int vgic_init(struct kvm *kvm) vgic_lpi_translation_cache_init(kvm); /* - * If we have GICv4.1 enabled, unconditionnaly request enable the + * If we have GICv4.1 enabled, unconditionally request enable the * v4 support so that we get HW-accelerated vSGIs. Otherwise, only * enable it if we present a virtual ITS to the guest. */ @@ -357,9 +366,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); + + xa_destroy(&dist->lpi_xa); } -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -370,33 +381,44 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } -/* To be called with kvm->lock held */ -static void __kvm_vgic_destroy(struct kvm *kvm) +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + +void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + mutex_lock(&kvm->slots_lock); + vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); + __kvm_vgic_vcpu_destroy(vcpu); + + mutex_lock(&kvm->arch.config_lock); kvm_vgic_dist_destroy(kvm); -} -void kvm_vgic_destroy(struct kvm *kvm) -{ - mutex_lock(&kvm->lock); - __kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); } /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest - * is a GICv2. A GICv3 must be explicitly initialized by the guest using the + * is a GICv2. A GICv3 must be explicitly initialized by userspace using the * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. * @kvm: kvm struct pointer */ @@ -414,9 +436,9 @@ int vgic_lazy_init(struct kvm *kvm) if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) return -EBUSY; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } return ret; @@ -425,57 +447,76 @@ int vgic_lazy_init(struct kvm *kvm) /* RESOURCE MAPPING */ /** + * kvm_vgic_map_resources - map the MMIO regions + * @kvm: kvm struct pointer + * * Map the MMIO regions depending on the VGIC model exposed to the guest * called on the first VCPU run. * Also map the virtual CPU interface into the VM. * v2 calls vgic_init() if not already done. * v3 and derivatives return an error if the VGIC is not initialized. * vgic_ready() returns true if this function has succeeded. - * @kvm: kvm struct pointer */ int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + enum vgic_type type; + gpa_t dist_base; int ret = 0; if (likely(vgic_ready(kvm))) return 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; if (!irqchip_in_kernel(kvm)) goto out; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { ret = vgic_v2_map_resources(kvm); - else + type = VGIC_V2; + } else { ret = vgic_v3_map_resources(kvm); + type = VGIC_V3; + } if (ret) - __kvm_vgic_destroy(kvm); - else - dist->ready = true; + goto out; + + dist->ready = true; + dist_base = dist->vgic_dist_base; + mutex_unlock(&kvm->arch.config_lock); + + ret = vgic_register_dist_iodev(kvm, dist_base, type); + if (ret) + kvm_err("Unable to register VGIC dist MMIO regions\n"); + goto out_slots; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); +out_slots: + mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + return ret; } /* GENERIC PROBE */ -static int vgic_init_cpu_starting(unsigned int cpu) +void kvm_vgic_cpu_up(void) { enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); - return 0; } -static int vgic_init_cpu_dying(unsigned int cpu) +void kvm_vgic_cpu_down(void) { disable_percpu_irq(kvm_vgic_global_state.maint_irq); - return 0; } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -572,7 +613,7 @@ int kvm_vgic_hyp_init(void) if (ret) return ret; - if (!has_mask) + if (!has_mask && !kvm_vgic_global_state.maint_irq) return 0; ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, @@ -584,19 +625,6 @@ int kvm_vgic_hyp_init(void) return ret; } - ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - "kvm/arm/vgic:starting", - vgic_init_cpu_starting, vgic_init_cpu_dying); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); return 0; - -out_free_irq: - free_percpu_irq(kvm_vgic_global_state.maint_irq, - kvm_get_running_vcpus()); - return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 475059bacedf..8c711deb25aa 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -23,7 +23,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); + return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL); } /** diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 9d3299a70242..e85a495ada9c 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&irq->lpi_list); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + if (ret) { + kfree(irq); + return ERR_PTR(ret); + } + INIT_LIST_HEAD(&irq->ap_list); raw_spin_lock_init(&irq->irq_lock); @@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, * There could be a race with another vgic_add_lpi(), so we need to * check that we don't add a second list entry with the same LPI. */ - list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { - if (oldirq->intid != intid) - continue; - + oldirq = xa_load(&dist->lpi_xa, intid); + if (vgic_try_get_irq_kref(oldirq)) { /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() on the returned pointer once it's - * finished with the IRQ. - */ - vgic_get_irq_kref(irq); + goto out_unlock; + } + ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0)); + if (ret) { + xa_release(&dist->lpi_xa, intid); + kfree(irq); goto out_unlock; } - list_add_tail(&irq->lpi_list, &dist->lpi_list_head); - dist->lpi_list_count++; + atomic_inc(&dist->lpi_count); out_unlock: raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + if (ret) + return ERR_PTR(ret); + /* * We "cache" the configuration table entries in our struct vgic_irq's. * However we only have those structs for mapped IRQs, so we read in @@ -158,7 +163,7 @@ struct vgic_translation_cache_entry { * @cte_esz: collection table entry size * @dte_esz: device table entry size * @ite_esz: interrupt translation table entry size - * @save tables: save the ITS tables into guest RAM + * @save_tables: save the ITS tables into guest RAM * @restore_tables: restore the ITS internal structs from tables * stored in guest RAM * @commit: initialize the registers which expose the ABI settings, @@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, return 0; } +#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1) + /* * Create a snapshot of the current LPIs targeting @vcpu, so that we can * enumerate those LPIs without holding any lock. @@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) { struct vgic_dist *dist = &kvm->arch.vgic; + XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET); struct vgic_irq *irq; unsigned long flags; u32 *intids; @@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) * command). If coming from another path (such as enabling LPIs), * we must be careful not to overrun the array. */ - irq_count = READ_ONCE(dist->lpi_list_count); + irq_count = atomic_read(&dist->lpi_count); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); if (!intids) return -ENOMEM; raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + rcu_read_lock(); + + xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) { if (i == irq_count) break; /* We don't need to "get" the IRQ, as we hold the list lock. */ @@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) continue; intids[i++] = irq->intid; } + + rcu_read_unlock(); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); *intid_ptr = intids; @@ -378,6 +390,12 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) return ret; } +static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm, + struct its_collection *col) +{ + return kvm_get_vcpu_by_id(kvm, col->target_addr); +} + /* * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -391,7 +409,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) if (!its_is_collection_mapped(ite->collection)) return; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); update_affinity(ite->irq, vcpu); } @@ -406,7 +424,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, struct its_ite *ite; for_each_lpi_its(device, ite, its) { - if (!ite->collection || coll != ite->collection) + if (ite->collection != coll) continue; update_affinity_ite(kvm, ite); @@ -462,6 +480,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -584,7 +605,11 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, unsigned long flags; raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + irq = __vgic_its_check_cache(dist, db, devid, eventid); + if (!vgic_try_get_irq_kref(irq)) + irq = NULL; + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return irq; @@ -627,8 +652,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, * was in the cache, and increment it on the new interrupt. */ if (cte->irq) - __vgic_put_lpi_locked(kvm, cte->irq); + vgic_put_irq(kvm, cte->irq); + /* + * The irq refcount is guaranteed to be nonzero while holding the + * its_lock, as the ITE (and the reference it holds) cannot be freed. + */ + lockdep_assert_held(&its->its_lock); vgic_get_irq_kref(irq); cte->db = db; @@ -659,7 +689,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm) if (!cte->irq) break; - __vgic_put_lpi_locked(kvm, cte->irq); + vgic_put_irq(kvm, cte->irq); cte->irq = NULL; } @@ -679,7 +709,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, if (!ite || !its_is_collection_mapped(ite->collection)) return E_ITS_INT_UNMAPPED_INTERRUPT; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); if (!vcpu) return E_ITS_INT_UNMAPPED_INTERRUPT; @@ -763,6 +793,7 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); return 0; } @@ -887,7 +918,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, return E_ITS_MOVI_UNMAPPED_COLLECTION; ite->collection = collection; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invalidate_cache(kvm); @@ -1121,7 +1152,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); irq = vgic_add_lpi(kvm, lpi_nr, vcpu); if (IS_ERR(irq)) { @@ -1242,21 +1273,22 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { u16 coll_id; - u32 target_addr; struct its_collection *collection; bool valid; valid = its_cmd_get_validbit(its_cmd); coll_id = its_cmd_get_collection(its_cmd); - target_addr = its_cmd_get_target_addr(its_cmd); - - if (target_addr >= atomic_read(&kvm->online_vcpus)) - return E_ITS_MAPC_PROCNUM_OOR; if (!valid) { vgic_its_free_collection(its, coll_id); vgic_its_invalidate_cache(kvm); } else { + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + if (!vcpu) + return E_ITS_MAPC_PROCNUM_OOR; + collection = find_collection(its, coll_id); if (!collection) { @@ -1270,9 +1302,9 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, coll_id); if (ret) return ret; - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; } else { - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; update_affinity_collection(kvm, its, collection); } } @@ -1330,8 +1362,8 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, } /** - * vgic_its_invall - invalidate all LPIs targetting a given vcpu - * @vcpu: the vcpu for which the RD is targetted by an invalidation + * vgic_its_invall - invalidate all LPIs targeting a given vcpu + * @vcpu: the vcpu for which the RD is targeted by an invalidation * * Contrary to the INVALL command, this targets a RD instead of a * collection, and we don't need to hold the its_lock, since no ITS is @@ -1382,7 +1414,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, if (!its_is_collection_mapped(collection)) return E_ITS_INVALL_UNMAPPED_COLLECTION; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invall(vcpu); return 0; @@ -1399,29 +1431,29 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { - u32 target1_addr = its_cmd_get_target_addr(its_cmd); - u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; u32 *intids; int irq_count, i; - if (target1_addr >= atomic_read(&kvm->online_vcpus) || - target2_addr >= atomic_read(&kvm->online_vcpus)) + /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */ + vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + vcpu2 = kvm_get_vcpu_by_id(kvm, its_cmd_mask_field(its_cmd, 3, 16, 32)); + + if (!vcpu1 || !vcpu2) return E_ITS_MOVALL_PROCNUM_OOR; - if (target1_addr == target2_addr) + if (vcpu1 == vcpu2) return 0; - vcpu1 = kvm_get_vcpu(kvm, target1_addr); - vcpu2 = kvm_get_vcpu(kvm, target2_addr); - irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); if (irq_count < 0) return irq_count; for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); @@ -1936,6 +1968,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm) static int vgic_its_create(struct kvm_device *dev, u32 type) { + int ret; struct vgic_its *its; if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) @@ -1945,9 +1978,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (!its) return -ENOMEM; + mutex_lock(&dev->kvm->arch.config_lock); + if (vgic_initialized(dev->kvm)) { - int ret = vgic_v4_init(dev->kvm); + ret = vgic_v4_init(dev->kvm); if (ret < 0) { + mutex_unlock(&dev->kvm->arch.config_lock); kfree(its); return ret; } @@ -1958,6 +1994,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) mutex_init(&its->its_lock); mutex_init(&its->cmd_lock); + /* Yep, even more trickery for lock ordering... */ +#ifdef CONFIG_LOCKDEP + mutex_lock(&its->cmd_lock); + mutex_lock(&its->its_lock); + mutex_unlock(&its->its_lock); + mutex_unlock(&its->cmd_lock); +#endif + its->vgic_its_base = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&its->device_list); @@ -1976,7 +2020,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) dev->private = its; - return vgic_its_set_abi(its, NR_ITS_ABIS - 1); + ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1); + + mutex_unlock(&dev->kvm->arch.config_lock); + + return ret; } static void vgic_its_destroy(struct kvm_device *kvm_dev) @@ -2045,6 +2093,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { ret = -ENXIO; goto out; @@ -2058,11 +2113,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, goto out; } - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - addr = its->vgic_its_base + offset; len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; @@ -2076,8 +2126,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } else { *reg = region->its_read(dev->kvm, its, addr, len); } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2110,7 +2161,7 @@ static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite) } /** - * entry_fn_t - Callback called on a table entry restore path + * typedef entry_fn_t - Callback called on a table entry restore path * @its: its handle * @id: id of the entry * @entry: pointer to the entry @@ -2149,7 +2200,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, memset(entry, 0, esz); - while (len > 0) { + while (true) { int next_offset; size_t byte_offset; @@ -2162,6 +2213,9 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return next_offset; byte_offset = next_offset * esz; + if (byte_offset >= len) + break; + id += next_offset; gpa += byte_offset; len -= byte_offset; @@ -2184,7 +2238,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | ite->collection->collection_id; val = cpu_to_le64(val); - return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); + return vgic_write_guest_lock(kvm, gpa, &val, ite_esz); } /** @@ -2236,7 +2290,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, return PTR_ERR(ite); if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = kvm_get_vcpu_by_id(kvm, collection->target_addr); irq = vgic_add_lpi(kvm, lpi_id, vcpu); if (IS_ERR(irq)) { @@ -2336,7 +2390,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); + return vgic_write_guest_lock(kvm, ptr, &val, dte_esz); } /** @@ -2523,7 +2577,7 @@ static int vgic_its_save_cte(struct vgic_its *its, ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | collection->collection_id); val = cpu_to_le64(val); - return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); + return vgic_write_guest_lock(its->dev->kvm, gpa, &val, esz); } /* @@ -2551,7 +2605,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) coll_id = val & KVM_ITS_CTE_ICID_MASK; if (target_addr != COLLECTION_NOT_MAPPED && - target_addr >= atomic_read(&kvm->online_vcpus)) + !kvm_get_vcpu_by_id(kvm, target_addr)) return -EINVAL; collection = find_collection(its, coll_id); @@ -2604,7 +2658,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) */ val = 0; BUG_ON(cte_esz > sizeof(val)); - ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); + ret = vgic_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); return ret; } @@ -2746,14 +2800,15 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return 0; mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return -EBUSY; } + mutex_lock(&kvm->arch.config_lock); + mutex_lock(&its->its_lock); + switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: vgic_its_reset(kvm, its); @@ -2766,12 +2821,30 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } - unlock_all_vcpus(kvm); mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->arch.config_lock); + unlock_all_vcpus(kvm); mutex_unlock(&kvm->lock); return ret; } +/* + * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory + * without the running VCPU when dirty ring is enabled. + * + * The running VCPU is required to track dirty guest pages when dirty ring + * is enabled. Otherwise, the backup bitmap should be used to track the + * dirty guest pages. When vgic/its tables are being saved, the backup + * bitmap is used to track the dirty guest pages due to the missed running + * VCPU in the period. + */ +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + return dist->table_write_in_progress; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index c6d52a1fd9c8..f48b8dab8b3d 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, if (addr + size < addr) return -EINVAL; - if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) + if (addr & ~kvm_phys_mask(&kvm->arch.mmu) || + (addr + size) > kvm_phys_size(&kvm->arch.mmu)) return -E2BIG; return 0; @@ -41,11 +42,42 @@ static int vgic_check_type(struct kvm *kvm, int type_needed) return 0; } +int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) +{ + struct vgic_dist *vgic = &kvm->arch.vgic; + int r; + + mutex_lock(&kvm->arch.config_lock); + switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_dist_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_DIST_SIZE); + if (!r) + vgic->vgic_dist_base = dev_addr->addr; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_cpu_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_CPU_SIZE); + if (!r) + vgic->vgic_cpu_base = dev_addr->addr; + break; + default: + r = -ENODEV; + } + + mutex_unlock(&kvm->arch.config_lock); + + return r; +} + /** * kvm_vgic_addr - set or get vgic VM base addresses * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX - * @addr: pointer to address value + * @attr: pointer to the attribute being retrieved/updated * @write: if true set the address in the VM address space, if false read the * address * @@ -57,15 +89,26 @@ static int vgic_check_type(struct kvm *kvm, int type_needed) * overlapping regions in case of a virtual GICv3 here, since we don't know * the number of VCPUs yet, so we defer this check to map_resources(). */ -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool write) { - int r = 0; + u64 __user *uaddr = (u64 __user *)attr->addr; struct vgic_dist *vgic = &kvm->arch.vgic; phys_addr_t *addr_ptr, alignment, size; u64 undef_value = VGIC_ADDR_UNDEF; + u64 addr; + int r; - mutex_lock(&kvm->lock); - switch (type) { + /* Reading a redistributor region addr implies getting the index */ + if (write || attr->attr == KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION) + if (get_user(addr, uaddr)) + return -EFAULT; + + /* + * Since we can't hold config_lock while registering the redistributor + * iodevs, take the slots_lock immediately. + */ + mutex_lock(&kvm->slots_lock); + switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; @@ -91,7 +134,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) if (r) break; if (write) { - r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); + r = vgic_v3_set_redist_base(kvm, 0, addr, 0); goto out; } rdreg = list_first_entry_or_null(&vgic->rd_regions, @@ -111,14 +154,12 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) if (r) break; - index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + index = addr & KVM_VGIC_V3_RDIST_INDEX_MASK; if (write) { - gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; - u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) - >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; - u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) - >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + gpa_t base = addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = FIELD_GET(KVM_VGIC_V3_RDIST_COUNT_MASK, addr); + u8 flags = FIELD_GET(KVM_VGIC_V3_RDIST_FLAGS_MASK, addr); if (!count || flags) r = -EINVAL; @@ -134,9 +175,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; } - *addr = index; - *addr |= rdreg->base; - *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + addr = index; + addr |= rdreg->base; + addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; goto out; } default: @@ -146,16 +187,22 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) if (r) goto out; + mutex_lock(&kvm->arch.config_lock); if (write) { - r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size); + r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); if (!r) - *addr_ptr = *addr; + *addr_ptr = addr; } else { - *addr = *addr_ptr; + addr = *addr_ptr; } + mutex_unlock(&kvm->arch.config_lock); out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->slots_lock); + + if (!r && !write) + r = put_user(addr, uaddr); + return r; } @@ -165,17 +212,9 @@ static int vgic_set_common_attr(struct kvm_device *dev, int r; switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, true); + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, true); return (r == -ENODEV) ? -ENXIO : r; - } case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; u32 val; @@ -195,7 +234,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, (val & 31)) return -EINVAL; - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) ret = -EBUSY; @@ -203,15 +242,36 @@ static int vgic_set_common_attr(struct kvm_device *dev, dev->kvm->arch.vgic.nr_spis = val - VGIC_NR_PRIVATE_IRQS; - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return ret; } case KVM_DEV_ARM_VGIC_GRP_CTRL: { switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_init(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); + return r; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + /* + * OK, this one isn't common at all, but we + * want to handle all control group attributes + * in a single place. + */ + if (vgic_check_type(dev->kvm, KVM_DEV_TYPE_ARM_VGIC_V3)) + return -ENXIO; + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + r = vgic_v3_save_pending_tables(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; } @@ -228,22 +288,9 @@ static int vgic_get_common_attr(struct kvm_device *dev, int r = -ENXIO; switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, false); - if (r) - return (r == -ENODEV) ? -ENXIO : r; - - if (copy_to_user(uaddr, &addr, sizeof(addr))) - return -EFAULT; - break; - } + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, false); + return (r == -ENODEV) ? -ENXIO : r; case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; @@ -293,72 +340,31 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, { int cpuid; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; + cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) - return -EINVAL; - - reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return 0; } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run and fiddle with the vgic state while we - * access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - /** * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state * * @dev: kvm device handle * @attr: kvm device attribute - * @reg: address the value is read or written * @is_write: true if userspace is writing a register */ static int vgic_v2_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, - u32 *reg, bool is_write) + bool is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; int ret; + u32 val; ret = vgic_v2_parse_attr(dev, attr, ®_attr); if (ret) @@ -367,83 +373,68 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, vcpu = reg_attr.vcpu; addr = reg_attr.addr; + if (is_write) + if (get_user(val, uaddr)) + return -EFAULT; + mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + ret = vgic_init(dev->kvm); if (ret) goto out; - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); break; case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, &val); break; default: ret = -EINVAL; break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); + + if (!ret && !is_write) + ret = put_user(val, uaddr); + return ret; } static int vgic_v2_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_v2_attr_regs_access(dev, attr, ®, true); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, true); + default: + return vgic_set_common_attr(dev, attr); } - - return -ENXIO; } static int vgic_v2_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_v2_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); } - - return -ENXIO; } static int vgic_v2_has_attr(struct kvm_device *dev, @@ -512,18 +503,18 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, * * @dev: kvm device handle * @attr: kvm device attribute - * @reg: address the value is read or written * @is_write: true if userspace is writing a register */ static int vgic_v3_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, - u64 *reg, bool is_write) + bool is_write) { struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; + bool uaccess; + u32 val; int ret; - u32 tmp32; ret = vgic_v3_parse_attr(dev, attr, ®_attr); if (ret) @@ -532,43 +523,45 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, vcpu = reg_attr.vcpu; addr = reg_attr.addr; - mutex_lock(&dev->kvm->lock); + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + /* Sysregs uaccess is performed by the sysreg handling code */ + uaccess = false; + break; + default: + uaccess = true; + } - if (unlikely(!vgic_initialized(dev->kvm))) { - ret = -EBUSY; - goto out; + if (uaccess && is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + if (get_user(val, uaddr)) + return -EFAULT; } + mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { ret = -EBUSY; goto out; } switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - if (is_write) - tmp32 = *reg; - - ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); - if (!is_write) - *reg = tmp32; + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val); break; case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: - if (is_write) - tmp32 = *reg; - - ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); - if (!is_write) - *reg = tmp32; + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &val); break; - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 regid; - - regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); - ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, - regid, reg); + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write); break; - } case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { unsigned int info, intid; @@ -578,7 +571,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, intid = attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, - intid, reg); + intid, &val); } else { ret = -EINVAL; } @@ -589,120 +582,45 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); + + if (!ret && uaccess && !is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + ret = put_user(val, uaddr); + } + return ret; } static int vgic_v3_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 tmp32; - u64 reg; - - if (get_user(tmp32, uaddr)) - return -EFAULT; - - reg = tmp32; - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } - case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - if (get_user(tmp32, uaddr)) - return -EFAULT; - - reg = tmp32; - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - int ret; - - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: - mutex_lock(&dev->kvm->lock); - - if (!lock_all_vcpus(dev->kvm)) { - mutex_unlock(&dev->kvm->lock); - return -EBUSY; - } - ret = vgic_v3_save_pending_tables(dev->kvm); - unlock_all_vcpus(dev->kvm); - mutex_unlock(&dev->kvm->lock); - return ret; - } - break; - } + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, true); + default: + return vgic_set_common_attr(dev, attr); } - return -ENXIO; } static int vgic_v3_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - tmp32 = reg; - return put_user(tmp32, uaddr); - } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 reg; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } - case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - tmp32 = reg; - return put_user(tmp32, uaddr); - } + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); } - return -ENXIO; } static int vgic_v3_has_attr(struct kvm_device *dev, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index f15e29cc63ce..c15ee1df036a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, case GICD_CTLR: { bool was_enabled, is_hwsgi; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); was_enabled = dist->enabled; is_hwsgi = dist->nassgireq; @@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, else if (!was_enabled && dist->enabled) vgic_kick_vcpus(vcpu->kvm); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; } case GICD_TYPER: @@ -357,31 +357,13 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (test_bit(i, &val)) { - /* - * pending_latch is set irrespective of irq type - * (level or edge) to avoid dependency that VM should - * restore irq config before pending info. - */ - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - } else { - irq->pending_latch = false; - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } + int ret; - vgic_put_irq(vcpu->kvm, irq); - } + ret = vgic_uaccess_write_spending(vcpu, addr, len, val); + if (ret) + return ret; - return 0; + return vgic_uaccess_write_cpending(vcpu, addr, len, ~val); } /* We want to avoid outer shareable. */ @@ -769,10 +751,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; struct vgic_redist_region *rdreg; gpa_t rd_base; - int ret; + int ret = 0; + + lockdep_assert_held(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) - return 0; + goto out_unlock; /* * We may be creating VCPUs before having set the base address for the @@ -782,10 +767,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) */ rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); if (!rdreg) - return 0; + goto out_unlock; - if (!vgic_v3_check_base(kvm)) - return -EINVAL; + if (!vgic_v3_check_base(kvm)) { + ret = -EINVAL; + goto out_unlock; + } vgic_cpu->rdreg = rdreg; vgic_cpu->rdreg_index = rdreg->free_index; @@ -799,19 +786,23 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); rd_dev->redist_vcpu = vcpu; - mutex_lock(&kvm->slots_lock); + mutex_unlock(&kvm->arch.config_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, 2 * SZ_64K, &rd_dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret) return ret; + /* Protected by slots_lock */ rdreg->free_index++; return 0; + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return ret; } -static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; @@ -824,6 +815,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) unsigned long c; int ret = 0; + lockdep_assert_held(&kvm->slots_lock); + kvm_for_each_vcpu(c, vcpu, kvm) { ret = vgic_register_redist_iodev(vcpu); if (ret) @@ -834,12 +827,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) /* The current c failed, so iterate over the previous ones. */ int i; - mutex_lock(&kvm->slots_lock); for (i = 0; i < c; i++) { vcpu = kvm_get_vcpu(kvm, i); vgic_unregister_redist_iodev(vcpu); } - mutex_unlock(&kvm->slots_lock); } return ret; @@ -938,7 +929,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) { int ret; + mutex_lock(&kvm->arch.config_lock); ret = vgic_v3_alloc_redist_region(kvm, index, addr, count); + mutex_unlock(&kvm->arch.config_lock); if (ret) return ret; @@ -950,8 +943,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) if (ret) { struct vgic_redist_region *rdreg; + mutex_lock(&kvm->arch.config_lock); rdreg = vgic_v3_rdist_region_from_index(kvm, index); vgic_v3_free_redist_region(rdreg); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -986,12 +981,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) iodev.base_addr = 0; break; } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 reg, id; - - id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); - return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_cpu_sysregs_attr(vcpu, attr); default: return -ENXIO; } @@ -1006,35 +997,6 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} /* * The ICC_SGI* registers encode the affinity differently from the MPIDR, @@ -1045,6 +1007,38 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) +static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); +} + /** * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs * @vcpu: The VCPU requesting a SGI @@ -1055,83 +1049,46 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) * This will trap in sys_regs.c and call this function. * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. + * + * If the interrupt routing mode bit is not set, we iterate over the Aff0 + * bits and signal the VCPUs matching the provided Aff{3,2,1}. + * + * If this bit is set, we signal all, but not the calling VCPU. */ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) { struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *c_vcpu; - u16 target_cpus; + unsigned long target_cpus; u64 mpidr; - int sgi; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - unsigned long c, flags; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; - mpidr = SGI_AFFINITY_LEVEL(reg, 3); - mpidr |= SGI_AFFINITY_LEVEL(reg, 2); - mpidr |= SGI_AFFINITY_LEVEL(reg, 1); - - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear its bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - struct vgic_irq *irq; - - /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) - break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) - continue; + u32 sgi, aff0; + unsigned long c; - if (!broadcast) { - int level0; + sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg); - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) + /* Broadcast */ + if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) { + kvm_for_each_vcpu(c, c_vcpu, kvm) { + /* Don't signal the calling VCPU */ + if (c_vcpu == vcpu) continue; - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } - irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); + return; + } - /* - * An access targeting Group0 SGIs can only generate - * those, while an access targeting Group1 SGIs can - * generate interrupts of either group. - */ - if (!irq->group || allow_group1) { - if (!irq->hw) { - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - } else { - /* HW SGI? Ask the GIC to inject it */ - int err; - err = irq_set_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - true); - WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } - } else { - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } + /* We iterate over affinities to find the corresponding vcpus */ + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg); - vgic_put_irq(vcpu->kvm, irq); + for_each_set_bit(aff0, &target_cpus, hweight_long(ICC_SGI1R_TARGET_LIST_MASK)) { + c_vcpu = kvm_mpidr_to_vcpu(kvm, mpidr | aff0); + if (c_vcpu) + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } } @@ -1158,7 +1115,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, } int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u32 intid, u64 *val) + u32 intid, u32 *val) { if (intid % 32) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 997d0fce2088..cf76523a2194 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -301,9 +301,8 @@ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); } -void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, + unsigned long val, bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; @@ -312,14 +311,22 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - /* GICD_ISPENDR0 SGI bits are WI */ - if (is_vgic_v2_sgi(vcpu, irq)) { + /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */ + if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { vgic_put_irq(vcpu->kvm, irq); continue; } raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { /* HW SGI? Ask the GIC to inject it */ int err; @@ -335,7 +342,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, } irq->pending_latch = true; - if (irq->hw) + if (irq->hw && !is_user) vgic_irq_set_phys_active(irq, true); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -343,33 +350,18 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, } } +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __set_pending(vcpu, addr, len, val, false); +} + int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = true; - - /* - * GICv2 SGIs are terribly broken. We can't restore - * the source of the interrupt, so just pick the vcpu - * itself as the source... - */ - if (is_vgic_v2_sgi(vcpu, irq)) - irq->source |= BIT(vcpu->vcpu_id); - - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - - vgic_put_irq(vcpu->kvm, irq); - } - + __set_pending(vcpu, addr, len, val, true); return 0; } @@ -394,9 +386,9 @@ static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) vgic_irq_set_phys_active(irq, false); } -void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __clear_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val, bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; @@ -405,14 +397,22 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - /* GICD_ICPENDR0 SGI bits are WI */ - if (is_vgic_v2_sgi(vcpu, irq)) { + /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */ + if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { vgic_put_irq(vcpu->kvm, irq); continue; } raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { /* HW SGI? Ask the GIC to clear its pending bit */ int err; @@ -427,7 +427,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, continue; } - if (irq->hw) + if (irq->hw && !is_user) vgic_hw_irq_cpending(vcpu, irq); else irq->pending_latch = false; @@ -437,33 +437,18 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } } +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __clear_pending(vcpu, addr, len, val, false); +} + int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - /* - * More fun with GICv2 SGIs! If we're clearing one of them - * from userspace, which source vcpu to clear? Let's not - * even think of it, and blow the whole set. - */ - if (is_vgic_v2_sgi(vcpu, irq)) - irq->source = 0; - - irq->pending_latch = false; - - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - - vgic_put_irq(vcpu->kvm, irq); - } - + __clear_pending(vcpu, addr, len, val, true); return 0; } @@ -473,9 +458,10 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, * active state can be overwritten when the VCPU's state is synced coming back * from the guest. * - * For shared interrupts as well as GICv3 private interrupts, we have to - * stop all the VCPUs because interrupts can be migrated while we don't hold - * the IRQ locks and we don't want to be chasing moving targets. + * For shared interrupts as well as GICv3 private interrupts accessed from the + * non-owning CPU, we have to stop all the VCPUs because interrupts can be + * migrated while we don't hold the IRQ locks and we don't want to be chasing + * moving targets. * * For GICv2 private interrupts we don't have to do anything because * userspace accesses to the VGIC state already require all VCPUs to be @@ -484,7 +470,8 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, */ static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && + vcpu != kvm_get_running_vcpu()) || intid >= VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } @@ -492,7 +479,8 @@ static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) /* See vgic_access_active_prepare */ static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid) { - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && + vcpu != kvm_get_running_vcpu()) || intid >= VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } @@ -527,13 +515,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 val; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); val = __vgic_mmio_read_active(vcpu, addr, len); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); return val; } @@ -622,13 +610,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, @@ -659,13 +647,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, @@ -775,10 +763,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, } } -u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) { int i; - u64 val = 0; + u32 val = 0; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; for (i = 0; i < 32; i++) { @@ -798,7 +786,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) } void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, - const u64 val) + const u32 val) { int i; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; @@ -1093,7 +1081,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type type) { struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; - int ret = 0; unsigned int len; switch (type) { @@ -1111,10 +1098,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, io_device->iodev_type = IODEV_DIST; io_device->redist_vcpu = NULL; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, - len, &io_device->dev); - mutex_unlock(&kvm->slots_lock); - - return ret; + return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index 6082d4b66d39..5b490a4dfa5e 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, bool is_write, int offset, u32 *val); -u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, - const u64 val); + const u32 val); unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 645648349c99..7e9cdb78f7ce 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm) return ret; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); - if (ret) { - kvm_err("Unable to register VGIC MMIO regions\n"); - return ret; - } - if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, kvm_vgic_global_state.vcpu_base, diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 826ff6f2a4e7..4ea3340786b9 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -3,6 +3,7 @@ #include <linux/irqchip/arm-gic-v3.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/kstrtox.h> #include <linux/kvm.h> #include <linux/kvm_host.h> #include <kvm/arm_vgic.h> @@ -339,7 +340,7 @@ retry: if (status) { /* clear consumed data */ val &= ~(1 << bit_nr); - ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; } @@ -350,26 +351,23 @@ retry: * The deactivation of the doorbell interrupt will trigger the * unmapping of the associated vPE. */ -static void unmap_all_vpes(struct vgic_dist *dist) +static void unmap_all_vpes(struct kvm *kvm) { - struct irq_desc *desc; + struct vgic_dist *dist = &kvm->arch.vgic; int i; - for (i = 0; i < dist->its_vm.nr_vpes; i++) { - desc = irq_to_desc(dist->its_vm.vpes[i]->irq); - irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); - } + for (i = 0; i < dist->its_vm.nr_vpes; i++) + free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); } -static void map_all_vpes(struct vgic_dist *dist) +static void map_all_vpes(struct kvm *kvm) { - struct irq_desc *desc; + struct vgic_dist *dist = &kvm->arch.vgic; int i; - for (i = 0; i < dist->its_vm.nr_vpes; i++) { - desc = irq_to_desc(dist->its_vm.vpes[i]->irq); - irq_domain_activate_irq(irq_desc_get_irq_data(desc), false); - } + for (i = 0; i < dist->its_vm.nr_vpes; i++) + WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), + dist->its_vm.vpes[i]->irq)); } /** @@ -382,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) struct vgic_irq *irq; gpa_t last_ptr = ~(gpa_t)0; bool vlpi_avail = false; + unsigned long index; int ret = 0; u8 val; @@ -394,11 +393,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) * and enabling of the doorbells have already been done. */ if (kvm_vgic_global_state.has_gicv4_1) { - unmap_all_vpes(dist); + unmap_all_vpes(kvm); vlpi_avail = true; } - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + xa_for_each(&dist->lpi_xa, index, irq) { int byte_offset, bit_nr; struct kvm_vcpu *vcpu; gpa_t pendbase, ptr; @@ -437,14 +436,14 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) else val &= ~(1 << bit_nr); - ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); if (ret) goto out; } out: if (vlpi_avail) - map_all_vpes(dist); + map_all_vpes(kvm); return ret; } @@ -541,7 +540,6 @@ int vgic_v3_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0; unsigned long c; kvm_for_each_vcpu(c, vcpu, kvm) { @@ -571,12 +569,6 @@ int vgic_v3_map_resources(struct kvm *kvm) return -EBUSY; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); - if (ret) { - kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - return ret; - } - if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); @@ -587,25 +579,25 @@ DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); static int __init early_group0_trap_cfg(char *buf) { - return strtobool(buf, &group0_trap); + return kstrtobool(buf, &group0_trap); } early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); static int __init early_group1_trap_cfg(char *buf) { - return strtobool(buf, &group1_trap); + return kstrtobool(buf, &group1_trap); } early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); static int __init early_common_trap_cfg(char *buf) { - return strtobool(buf, &common_trap); + return kstrtobool(buf, &common_trap); } early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); static int __init early_gicv4_enable(char *buf) { - return strtobool(buf, &gicv4_enable); + return kstrtobool(buf, &gicv4_enable); } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); @@ -616,6 +608,12 @@ static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), {}, }; @@ -687,7 +685,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { group0_trap = true; group1_trap = true; } @@ -752,7 +750,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - WARN_ON(vgic_v4_put(vcpu, false)); + WARN_ON(vgic_v4_put(vcpu)); vgic_v3_vmcr_sync(vcpu); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index ad06ba6c9b00..74a67ad87f29 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) } } -/* Must be called with the kvm lock held */ void vgic_v4_configure_vsgis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + kvm_arm_halt_guest(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -222,14 +223,18 @@ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val) *val = !!(*ptr & mask); } +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) +{ + return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); +} + /** * vgic_v4_init - Initialize the GICv4 data structures * @kvm: Pointer to the VM being initialized * * We may be called each time a vITS is created, or when the - * vgic is initialized. This relies on kvm->lock to be - * held. In both cases, the number of vcpus should now be - * fixed. + * vgic is initialized. In both cases, the number of vcpus + * should now be fixed. */ int vgic_v4_init(struct kvm *kvm) { @@ -238,6 +243,8 @@ int vgic_v4_init(struct kvm *kvm) int nr_vcpus, ret; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ @@ -283,8 +290,7 @@ int vgic_v4_init(struct kvm *kvm) irq_flags &= ~IRQ_NOAUTOEN; irq_set_status_flags(irq, irq_flags); - ret = request_irq(irq, vgic_v4_doorbell_handler, - 0, "vcpu", vcpu); + ret = vgic_v4_request_vpe_irq(vcpu, irq); if (ret) { kvm_err("failed to allocate vcpu IRQ%d\n", irq); /* @@ -305,14 +311,14 @@ int vgic_v4_init(struct kvm *kvm) /** * vgic_v4_teardown - Free the GICv4 data structures * @kvm: Pointer to the VM being destroyed - * - * Relies on kvm->lock to be held. */ void vgic_v4_teardown(struct kvm *kvm) { struct its_vm *its_vm = &kvm->arch.vgic.its_vm; int i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!its_vm->vpes) return; @@ -330,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } -int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db) +int vgic_v4_put(struct kvm_vcpu *vcpu) { struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_make_vpe_non_resident(vpe, need_db); + return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); } int vgic_v4_load(struct kvm_vcpu *vcpu) @@ -348,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) return 0; + if (vcpu_get_flag(vcpu, IN_WFI)) + return 0; + /* * Before making the VPE resident, make sure the redistributor * corresponding to our current CPU expects us here. See the @@ -427,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (ret) goto out; + /* Silently exit if the vLPI is already mapped */ + if (irq->hw) + goto out; + /* * Emit the mapping request. If it fails, the ITS probably * isn't v4 compatible, so let's silently bail out. Holding diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..4ec93587c8cd 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -24,11 +24,14 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: * kvm->lock (mutex) - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vcpu->mutex (mutex) + * kvm->arch.config_lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower @@ -52,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { */ /* - * Iterate over the VM's list of mapped LPIs to find the one with a - * matching interrupt ID and return a reference to the IRQ structure. + * Index the VM's xarray of mapped LPIs and return a reference to the IRQ + * structure. The caller is expected to call vgic_put_irq() later once it's + * finished with the IRQ. */ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = NULL; - unsigned long flags; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + rcu_read_lock(); - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - if (irq->intid != intid) - continue; + irq = xa_load(&dist->lpi_xa, intid); + if (!vgic_try_get_irq_kref(irq)) + irq = NULL; - /* - * This increases the refcount, the caller is expected to - * call vgic_put_irq() later once it's finished with the IRQ. - */ - vgic_get_irq_kref(irq); - goto out_unlock; - } - irq = NULL; - -out_unlock: - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + rcu_read_unlock(); return irq; } @@ -118,22 +111,6 @@ static void vgic_irq_release(struct kref *ref) { } -/* - * Drop the refcount on the LPI. Must be called with lpi_list_lock held. - */ -void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - if (!kref_put(&irq->refcount, vgic_irq_release)) - return; - - list_del(&irq->lpi_list); - dist->lpi_list_count--; - - kfree(irq); -} - void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -142,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) if (irq->intid < VGIC_MIN_LPI) return; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - __vgic_put_lpi_locked(kvm, irq); - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_erase(&dist->lpi_xa, irq->intid); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + atomic_dec(&dist->lpi_count); + kfree_rcu(irq, rcu); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) @@ -201,7 +184,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) } /** - * kvm_vgic_target_oracle - compute the target vcpu for an irq + * vgic_target_oracle - compute the target vcpu for an irq * * @irq: The irq to route. Must be already locked. * @@ -402,7 +385,8 @@ retry: /* * Grab a reference to the irq to reflect the fact that it is - * now in the ap_list. + * now in the ap_list. This is safe as the caller must already hold a + * reference on the irq. */ vgic_get_irq_kref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); @@ -420,7 +404,7 @@ retry: /** * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs + * @vcpu: The CPU for PPIs or NULL for global interrupts * @intid: The INTID to inject a new state to. * @level: Edge-triggered: true: to trigger the interrupt * false: to ignore the call @@ -434,24 +418,22 @@ retry: * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level, void *owner) +int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int intid, bool level, void *owner) { - struct kvm_vcpu *vcpu; struct vgic_irq *irq; unsigned long flags; int ret; - trace_vgic_update_irq_pending(cpuid, intid, level); - ret = vgic_lazy_init(kvm); if (ret) return ret; - vcpu = kvm_get_vcpu(kvm, cpuid); if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) return -EINVAL; + trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); + irq = vgic_get_irq(kvm, vcpu, intid); if (!irq) return -EINVAL; @@ -573,6 +555,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) return 0; } +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4c6bdd321faa..0c2b82de8fa3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -6,6 +6,7 @@ #define __KVM_ARM_VGIC_NEW_H__ #include <linux/irqchip/arm-gic-common.h> +#include <asm/kvm_mmu.h> #define PRODUCT_ID_KVM 0x4b /* ASCII code K */ #define IMPLEMENTER_ARM 0x43b @@ -131,6 +132,19 @@ static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq) return vgic_irq_get_lr_count(irq) > 1; } +static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret; + + dist->table_write_in_progress = true; + ret = kvm_write_guest_lock(kvm, gpa, data, len); + dist->table_write_in_progress = false; + + return ret; +} + /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -166,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); -void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); @@ -185,7 +198,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); -void vgic_v2_set_npie(struct kvm_vcpu *vcpu); int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); @@ -207,19 +219,26 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); -static inline void vgic_get_irq_kref(struct vgic_irq *irq) +static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) { + if (!irq) + return false; + if (irq->intid < VGIC_MIN_LPI) - return; + return true; + + return kref_get_unless_zero(&irq->refcount); +} - kref_get(&irq->refcount); +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + WARN_ON_ONCE(!vgic_try_get_irq_kref(irq)); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); -void vgic_v3_set_npie(struct kvm_vcpu *vcpu); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); @@ -229,6 +248,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); @@ -245,12 +265,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); -int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u64 id, u64 *val); -int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, - u64 *reg); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr, bool is_write); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u32 intid, u64 *val); + u32 intid, u32 *val); int kvm_register_vgic_device(unsigned long type); void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); @@ -260,9 +279,6 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; @@ -332,5 +348,6 @@ int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); #endif |