diff options
Diffstat (limited to 'arch/x86/kernel/irq.c')
-rw-r--r-- | arch/x86/kernel/irq.c | 200 |
1 files changed, 174 insertions, 26 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 766ffe3ba313..10721a125226 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -22,13 +22,22 @@ #include <asm/desc.h> #include <asm/traps.h> #include <asm/thermal.h> +#include <asm/posted_intr.h> +#include <asm/irq_remapping.h> +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); +EXPORT_PER_CPU_SYMBOL(__softirq_pending); + +DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); + atomic_t irq_err_count; /* @@ -49,7 +58,7 @@ void ack_bad_irq(unsigned int irq) * completely. * But only ack when the APIC is enabled -AK */ - ack_APIC_irq(); + apic_eoi(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) @@ -164,7 +173,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif -#ifdef CONFIG_HAVE_KVM +#if IS_ENABLED(CONFIG_KVM) seq_printf(p, "%*s: ", prec, "PIN"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); @@ -182,6 +191,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->kvm_posted_intr_wakeup_ipis); seq_puts(p, " Posted-interrupt wakeup event\n"); #endif +#ifdef CONFIG_X86_POSTED_MSI + seq_printf(p, "%*s: ", prec, "PMN"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->posted_msi_notification_count); + seq_puts(p, " Posted MSI notification event\n"); +#endif return 0; } @@ -211,6 +227,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR + sum += irq_stats(cpu)->irq_hv_callback_count; +#endif +#if IS_ENABLED(CONFIG_HYPERV) + sum += irq_stats(cpu)->irq_hv_reenlightenment_count; + sum += irq_stats(cpu)->hyperv_stimer0_count; +#endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); @@ -233,6 +256,61 @@ static __always_inline void handle_irq(struct irq_desc *desc, __handle_irq(desc, regs); } +static struct irq_desc *reevaluate_vector(int vector) +{ + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + + if (!IS_ERR_OR_NULL(desc)) + return desc; + + if (desc == VECTOR_UNUSED) + pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); + else + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + return NULL; +} + +static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) +{ + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + + if (likely(!IS_ERR_OR_NULL(desc))) { + handle_irq(desc, regs); + return true; + } + + /* + * Reevaluate with vector_lock held to prevent a race against + * request_irq() setting up the vector: + * + * CPU0 CPU1 + * interrupt is raised in APIC IRR + * but not handled + * free_irq() + * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; + * + * request_irq() common_interrupt() + * d = this_cpu_read(vector_irq[vector]); + * + * per_cpu(vector_irq, CPU1)[vector] = desc; + * + * if (d == VECTOR_SHUTDOWN) + * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + * + * This requires that the same vector on the same target CPU is + * handed out or that a spurious interrupt hits that CPU/vector. + */ + lock_vector_lock(); + desc = reevaluate_vector(vector); + unlock_vector_lock(); + + if (!desc) + return false; + + handle_irq(desc, regs); + return true; +} + /* * common_interrupt() handles all normal device IRQ's (the special SMP * cross-CPU interrupts have their own entry points). @@ -240,25 +318,12 @@ static __always_inline void handle_irq(struct irq_desc *desc, DEFINE_IDTENTRY_IRQ(common_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc *desc; /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); - desc = __this_cpu_read(vector_irq[vector]); - if (likely(!IS_ERR_OR_NULL(desc))) { - handle_irq(desc, regs); - } else { - ack_APIC_irq(); - - if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", - __func__, smp_processor_id(), - vector); - } else { - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - } - } + if (unlikely(!call_irq_handler(vector, regs))) + apic_eoi(); set_irq_regs(old_regs); } @@ -273,7 +338,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) { struct pt_regs *old_regs = set_irq_regs(regs); - ack_APIC_irq(); + apic_eoi(); trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) @@ -283,7 +348,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) } #endif -#ifdef CONFIG_HAVE_KVM +#if IS_ENABLED(CONFIG_KVM) static void dummy_handler(void) {} static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; @@ -303,7 +368,7 @@ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) { - ack_APIC_irq(); + apic_eoi(); inc_irq_stat(kvm_posted_intr_ipis); } @@ -312,7 +377,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) */ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) { - ack_APIC_irq(); + apic_eoi(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); kvm_posted_intr_wakeup_handler(); } @@ -322,17 +387,101 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) { - ack_APIC_irq(); + apic_eoi(); inc_irq_stat(kvm_posted_intr_nested_ipis); } #endif +#ifdef CONFIG_X86_POSTED_MSI + +/* Posted Interrupt Descriptors for coalesced MSIs to be posted */ +DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); + +void intel_posted_msi_init(void) +{ + u32 destination; + u32 apic_id; + + this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); + + /* + * APIC destination ID is stored in bit 8:15 while in XAPIC mode. + * VT-d spec. CH 9.11 + */ + apic_id = this_cpu_read(x86_cpu_to_apicid); + destination = x2apic_enabled() ? apic_id : apic_id << 8; + this_cpu_write(posted_msi_pi_desc.ndst, destination); +} + +static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) +{ + unsigned long pir_copy[NR_PIR_WORDS]; + int vec = FIRST_EXTERNAL_VECTOR; + + if (!pi_harvest_pir(pir, pir_copy)) + return false; + + for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) + call_irq_handler(vec, regs); + + return true; +} + +/* + * Performance data shows that 3 is good enough to harvest 90+% of the benefit + * on high IRQ rate workload. + */ +#define MAX_POSTED_MSI_COALESCING_LOOP 3 + +/* + * For MSIs that are delivered as posted interrupts, the CPU notifications + * can be coalesced if the MSIs arrive in high frequency bursts. + */ +DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct pi_desc *pid; + int i = 0; + + pid = this_cpu_ptr(&posted_msi_pi_desc); + + inc_irq_stat(posted_msi_notification_count); + irq_enter(); + + /* + * Max coalescing count includes the extra round of handle_pending_pir + * after clearing the outstanding notification bit. Hence, at most + * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. + */ + while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { + if (!handle_pending_pir(pid->pir, regs)) + break; + } + + /* + * Clear outstanding notification bit to allow new IRQ notifications, + * do this last to maximize the window of interrupt coalescing. + */ + pi_clear_on(pid); + + /* + * There could be a race of PI notification and the clearing of ON bit, + * process PIR bits one last time such that handling the new interrupts + * are not delayed until the next IRQ. + */ + handle_pending_pir(pid->pir, regs); + + apic_eoi(); + irq_exit(); + set_irq_regs(old_regs); +} +#endif /* X86_POSTED_MSI */ #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irr, vector; + unsigned int vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; @@ -359,8 +508,7 @@ void fixup_irqs(void) if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1 << (vector % 32))) { + if (is_vector_pending(vector)) { desc = __this_cpu_read(vector_irq[vector]); raw_spin_lock(&desc->lock); @@ -394,6 +542,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) inc_irq_stat(irq_thermal_count); smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); - ack_APIC_irq(); + apic_eoi(); } #endif |