diff options
Diffstat (limited to 'arch/x86/kernel/apic/vector.c')
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 359 |
1 files changed, 200 insertions, 159 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3e6f6b448f6a..a947b46a8b64 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -44,7 +44,18 @@ static cpumask_var_t vector_searchmask; static struct irq_chip lapic_controller; static struct irq_matrix *vector_matrix; #ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct hlist_head, cleanup_list); + +static void vector_cleanup_callback(struct timer_list *tmr); + +struct vector_cleanup { + struct hlist_head head; + struct timer_list timer; +}; + +static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = { + .head = HLIST_HEAD_INIT, + .timer = __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED), +}; #endif void lock_vector_lock(void) @@ -172,6 +183,7 @@ setnew: apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + apic_update_irq_cfg(irqd, newvec, newcpu); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -250,7 +262,6 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); return 0; } @@ -327,7 +338,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector < 0) return vector; apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); + return 0; } @@ -536,13 +547,9 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, struct irq_data *irqd; int i, err, node; - if (disable_apic) + if (apic_is_disabled) return -ENXIO; - /* Currently vector allocator can't guarantee contiguous allocations */ - if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) - return -ENOSYS; - /* * Catch any attempt to touch the cascade interrupt on a PIC * equipped system. @@ -684,7 +691,7 @@ static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec, * if IRQ remapping is enabled. APIC IDs above 15 bits are * only permitted if IRQ remapping is enabled, so check that. */ - if (apic->apic_id_valid(32768)) + if (apic_id_valid(32768)) return 0; return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec); @@ -705,8 +712,8 @@ int __init arch_probe_nr_irqs(void) { int nr; - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; + if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids) + irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids); nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; #if defined(CONFIG_PCI_MSI) @@ -718,8 +725,8 @@ int __init arch_probe_nr_irqs(void) else nr += gsi_top * 16; #endif - if (nr < nr_irqs) - nr_irqs = nr; + if (nr < irq_get_nr_irqs()) + irq_set_nr_irqs(nr); /* * We don't know if PIC is present at this point so we need to do @@ -731,8 +738,8 @@ int __init arch_probe_nr_irqs(void) void lapic_assign_legacy_vector(unsigned int irq, bool replace) { /* - * Use assign system here so it wont get accounted as allocated - * and moveable in the cpu hotplug check and it prevents managed + * Use assign system here so it won't get accounted as allocated + * and movable in the cpu hotplug check and it prevents managed * irq reservation from touching it. */ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); @@ -792,7 +799,7 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_set_default_host(x86_vector_domain); + irq_set_default_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); @@ -845,10 +852,21 @@ void lapic_online(void) this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); } +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr); + void lapic_offline(void) { + struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup); + lock_vector_lock(); + + /* In case the vector cleanup timer has not expired */ + __vector_cleanup(cl, false); + irq_matrix_offline(vector_matrix); + WARN_ON_ONCE(timer_delete_sync_try(&cl->timer) < 0); + WARN_ON_ONCE(!hlist_empty(&cl->head)); + unlock_vector_lock(); } @@ -870,50 +888,6 @@ static int apic_set_affinity(struct irq_data *irqd, return err ? err : IRQ_SET_MASK_OK; } -#else -# define apic_set_affinity NULL -#endif - -static int apic_retrigger_irq(struct irq_data *irqd) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - unsigned long flags; - - raw_spin_lock_irqsave(&vector_lock, flags); - apic->send_IPI(apicd->cpu, apicd->vector); - raw_spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} - -void apic_ack_irq(struct irq_data *irqd) -{ - irq_move_irq(irqd); - ack_APIC_irq(); -} - -void apic_ack_edge(struct irq_data *irqd) -{ - irq_complete_move(irqd_cfg(irqd)); - apic_ack_irq(irqd); -} - -static void x86_vector_msi_compose_msg(struct irq_data *data, - struct msi_msg *msg) -{ - __irq_msi_compose_msg(irqd_cfg(data), msg, false); -} - -static struct irq_chip lapic_controller = { - .name = "APIC", - .irq_ack = apic_ack_edge, - .irq_set_affinity = apic_set_affinity, - .irq_compose_msi_msg = x86_vector_msi_compose_msg, - .irq_retrigger = apic_retrigger_irq, -}; - -#ifdef CONFIG_SMP - static void free_moved_vector(struct apic_chip_data *apicd) { unsigned int vector = apicd->prev_vector; @@ -938,116 +912,27 @@ static void free_moved_vector(struct apic_chip_data *apicd) apicd->move_in_progress = 0; } -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup) -{ - struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); - struct apic_chip_data *apicd; - struct hlist_node *tmp; - - ack_APIC_irq(); - /* Prevent vectors vanishing under us */ - raw_spin_lock(&vector_lock); - - hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { - unsigned int irr, vector = apicd->prev_vector; - - /* - * Paranoia: Check if the vector that needs to be cleaned - * up is registered at the APICs IRR. If so, then this is - * not the best time to clean it up. Clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest - * priority external vector, so on return from this - * interrupt the device interrupt will happen first. - */ - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1U << (vector % 32))) { - apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - continue; - } - free_moved_vector(apicd); - } - - raw_spin_unlock(&vector_lock); -} - -static void __send_cleanup_vector(struct apic_chip_data *apicd) -{ - unsigned int cpu; - - raw_spin_lock(&vector_lock); - apicd->move_in_progress = 0; - cpu = apicd->prev_cpu; - if (cpu_online(cpu)) { - hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); - apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); - } else { - apicd->prev_vector = 0; - } - raw_spin_unlock(&vector_lock); -} - -void send_cleanup_vector(struct irq_cfg *cfg) -{ - struct apic_chip_data *apicd; - - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (apicd->move_in_progress) - __send_cleanup_vector(apicd); -} - -void irq_complete_move(struct irq_cfg *cfg) -{ - struct apic_chip_data *apicd; - - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (likely(!apicd->move_in_progress)) - return; - - /* - * If the interrupt arrived on the new target CPU, cleanup the - * vector on the old target CPU. A vector check is not required - * because an interrupt can never move from one vector to another - * on the same CPU. - */ - if (apicd->cpu == smp_processor_id()) - __send_cleanup_vector(apicd); -} - /* * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ -void irq_force_complete_move(struct irq_desc *desc) +static void apic_force_complete_move(struct irq_data *irqd) { + unsigned int cpu = smp_processor_id(); struct apic_chip_data *apicd; - struct irq_data *irqd; unsigned int vector; - /* - * The function is called for all descriptors regardless of which - * irqdomain they belong to. For example if an IRQ is provided by - * an irq_chip as part of a GPIO driver, the chip data for that - * descriptor is specific to the irq_chip in question. - * - * Check first that the chip_data is what we expect - * (apic_chip_data) before touching it any further. - */ - irqd = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqd) - return; - - raw_spin_lock(&vector_lock); + guard(raw_spinlock)(&vector_lock); apicd = apic_chip_data(irqd); if (!apicd) - goto unlock; + return; /* - * If prev_vector is empty, no action required. + * If prev_vector is empty or the descriptor is neither currently + * nor previously on the outgoing CPU no action required. */ vector = apicd->prev_vector; - if (!vector) - goto unlock; + if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) + return; /* * This is tricky. If the cleanup of the old vector has not been @@ -1101,10 +986,166 @@ void irq_force_complete_move(struct irq_desc *desc) irqd->irq, vector); } free_moved_vector(apicd); -unlock: +} + +#else +# define apic_set_affinity NULL +# define apic_force_complete_move NULL +#endif + +static int apic_retrigger_irq(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + __apic_send_IPI(apicd->cpu, apicd->vector); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} + +void apic_ack_irq(struct irq_data *irqd) +{ + irq_move_irq(irqd); + apic_eoi(); +} + +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + +static void x86_vector_msi_compose_msg(struct irq_data *data, + struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg, false); +} + +static struct irq_chip lapic_controller = { + .name = "APIC", + .irq_ack = apic_ack_edge, + .irq_set_affinity = apic_set_affinity, + .irq_compose_msi_msg = x86_vector_msi_compose_msg, + .irq_force_complete_move = apic_force_complete_move, + .irq_retrigger = apic_retrigger_irq, +}; + +#ifdef CONFIG_SMP + +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) +{ + struct apic_chip_data *apicd; + struct hlist_node *tmp; + bool rearm = false; + + lockdep_assert_held(&vector_lock); + + hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) { + unsigned int vector = apicd->prev_vector; + + /* + * Paranoia: Check if the vector that needs to be cleaned + * up is registered at the APICs IRR. That's clearly a + * hardware issue if the vector arrived on the old target + * _after_ interrupts were disabled above. Keep @apicd + * on the list and schedule the timer again to give the CPU + * a chance to handle the pending interrupt. + * + * Do not check IRR when called from lapic_offline(), because + * fixup_irqs() was just called to scan IRR for set bits and + * forward them to new destination CPUs via IPIs. + */ + if (check_irr && is_vector_pending(vector)) { + pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq); + rearm = true; + continue; + } + free_moved_vector(apicd); + } + + /* + * Must happen under vector_lock to make the timer_pending() check + * in __vector_schedule_cleanup() race free against the rearm here. + */ + if (rearm) + mod_timer(&cl->timer, jiffies + 1); +} + +static void vector_cleanup_callback(struct timer_list *tmr) +{ + struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer); + + /* Prevent vectors vanishing under us */ + raw_spin_lock_irq(&vector_lock); + __vector_cleanup(cl, true); + raw_spin_unlock_irq(&vector_lock); +} + +static void __vector_schedule_cleanup(struct apic_chip_data *apicd) +{ + unsigned int cpu = apicd->prev_cpu; + + raw_spin_lock(&vector_lock); + apicd->move_in_progress = 0; + if (cpu_online(cpu)) { + struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu); + + hlist_add_head(&apicd->clist, &cl->head); + + /* + * The lockless timer_pending() check is safe here. If it + * returns true, then the callback will observe this new + * apic data in the hlist as everything is serialized by + * vector lock. + * + * If it returns false then the timer is either not armed + * or the other CPU executes the callback, which again + * would be blocked on vector lock. Rearming it in the + * latter case makes it fire for nothing. + * + * This is also safe against the callback rearming the timer + * because that's serialized via vector lock too. + */ + if (!timer_pending(&cl->timer)) { + cl->timer.expires = jiffies + 1; + add_timer_on(&cl->timer, cpu); + } + } else { + pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); + free_moved_vector(apicd); + } raw_spin_unlock(&vector_lock); } +void vector_schedule_cleanup(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (apicd->move_in_progress) + __vector_schedule_cleanup(apicd); +} + +void irq_complete_move(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (likely(!apicd->move_in_progress)) + return; + + /* + * If the interrupt arrived on the new target CPU, cleanup the + * vector on the old target CPU. A vector check is not required + * because an interrupt can never move from one vector to another + * on the same CPU. + */ + if (apicd->cpu == smp_processor_id()) + __vector_schedule_cleanup(apicd); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Note, this is not accurate accounting, but at least good enough to @@ -1154,7 +1195,7 @@ static void __init print_local_APIC(void *dummy) u64 icr; pr_debug("printing local APIC contents on CPU#%d/%d:\n", - smp_processor_id(), hard_smp_processor_id()); + smp_processor_id(), read_apic_id()); v = apic_read(APIC_ID); pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); |