aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/apic.h270
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h6
-rw-r--r--arch/x86/include/asm/io_apic.h2
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h8
-rw-r--r--arch/x86/include/asm/irqdomain.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h248
-rw-r--r--arch/x86/include/asm/x2apic.h50
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/kernel/apic/Makefile2
-rw-r--r--arch/x86/kernel/apic/apic.c239
-rw-r--r--arch/x86/kernel/apic/apic_common.c46
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c10
-rw-r--r--arch/x86/kernel/apic/apic_noop.c25
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c12
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c8
-rw-r--r--arch/x86/kernel/apic/htirq.c5
-rw-r--r--arch/x86/kernel/apic/io_apic.c139
-rw-r--r--arch/x86/kernel/apic/probe_32.c29
-rw-r--r--arch/x86/kernel/apic/vector.c1099
-rw-r--r--arch/x86/kernel/apic/x2apic.h9
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c196
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c44
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c17
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/idt.c12
-rw-r--r--arch/x86/kernel/irq.c101
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/smpboot.c99
-rw-r--r--arch/x86/kernel/time.c5
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/vsmp_64.c19
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/platform/uv/uv_irq.c5
-rw-r--r--arch/x86/xen/apic.c6
-rw-r--r--arch/x86/xen/enlighten_pv.c1
-rw-r--r--drivers/gpio/gpio-xgene-sb.c8
-rw-r--r--drivers/iommu/amd_iommu.c44
-rw-r--r--drivers/iommu/intel_irq_remapping.c43
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c10
-rw-r--r--drivers/pci/msi.c2
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c5
-rw-r--r--include/linux/irq.h22
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/irqdomain.h14
-rw-r--r--include/linux/msi.h5
-rw-r--r--include/trace/events/irq_matrix.h201
-rw-r--r--init/main.c2
-rw-r--r--kernel/irq/Kconfig6
-rw-r--r--kernel/irq/Makefile1
-rw-r--r--kernel/irq/autoprobe.c2
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/debugfs.c12
-rw-r--r--kernel/irq/internals.h19
-rw-r--r--kernel/irq/irqdesc.c3
-rw-r--r--kernel/irq/irqdomain.c43
-rw-r--r--kernel/irq/manage.c18
-rw-r--r--kernel/irq/matrix.c443
-rw-r--r--kernel/irq/msi.c32
63 files changed, 2343 insertions, 1377 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb23313dd5..94802149255a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,8 +93,10 @@ config X86
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
+ select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC
select GENERIC_IRQ_MIGRATION if SMP
select GENERIC_IRQ_PROBE
+ select GENERIC_IRQ_RESERVATION_MODE
select GENERIC_IRQ_SHOW
select GENERIC_PENDING_IRQ if SMP
select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5f01671c68f2..a9e57f08bfa6 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -53,6 +53,15 @@ extern int local_apic_timer_c2_ok;
extern int disable_apic;
extern unsigned int lapic_timer_frequency;
+extern enum apic_intr_mode_id apic_intr_mode;
+enum apic_intr_mode_id {
+ APIC_PIC,
+ APIC_VIRTUAL_WIRE,
+ APIC_VIRTUAL_WIRE_NO_CONFIG,
+ APIC_SYMMETRIC_IO,
+ APIC_SYMMETRIC_IO_NO_ROUTING
+};
+
#ifdef CONFIG_SMP
extern void __inquire_remote_apic(int apicid);
#else /* CONFIG_SMP */
@@ -127,14 +136,13 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
+extern void apic_intr_mode_init(void);
extern void setup_local_APIC(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern void lapic_update_tsc_freq(void);
-extern int APIC_init_uniprocessor(void);
#ifdef CONFIG_X86_64
static inline int apic_force_enable(unsigned long addr)
@@ -145,7 +153,7 @@ static inline int apic_force_enable(unsigned long addr)
extern int apic_force_enable(unsigned long addr);
#endif
-extern int apic_bsp_setup(bool upmode);
+extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
@@ -161,6 +169,10 @@ static inline int apic_is_clustered_box(void)
#endif
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+extern void lapic_assign_system_vectors(void);
+extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_online(void);
+extern void lapic_offline(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
@@ -170,6 +182,9 @@ static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
+static inline void apic_intr_mode_init(void) { }
+static inline void lapic_assign_system_vectors(void) { }
+static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
@@ -265,73 +280,63 @@ struct irq_data;
* James Cleverdon.
*/
struct apic {
- char *name;
-
- int (*probe)(void);
- int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- int (*apic_id_valid)(int apicid);
- int (*apic_id_registered)(void);
-
- u32 irq_delivery_mode;
- u32 irq_dest_mode;
-
- const struct cpumask *(*target_cpus)(void);
-
- int disable_esr;
-
- int dest_logical;
- unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
-
- void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
- const struct cpumask *mask);
- void (*init_apic_ldr)(void);
-
- void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
-
- void (*setup_apic_routing)(void);
- int (*cpu_present_to_apicid)(int mps_cpu);
- void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
- int (*check_phys_apicid_present)(int phys_apicid);
- int (*phys_pkg_id)(int cpuid_apic, int index_msb);
-
- unsigned int (*get_apic_id)(unsigned long x);
- /* Can't be NULL on 64-bit */
- unsigned long (*set_apic_id)(unsigned int id);
-
- int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
- struct irq_data *irqdata,
- unsigned int *apicid);
-
- /* ipi */
- void (*send_IPI)(int cpu, int vector);
- void (*send_IPI_mask)(const struct cpumask *mask, int vector);
- void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
- int vector);
- void (*send_IPI_allbutself)(int vector);
- void (*send_IPI_all)(int vector);
- void (*send_IPI_self)(int vector);
+ /* Hotpath functions first */
+ void (*eoi_write)(u32 reg, u32 v);
+ void (*native_eoi_write)(u32 reg, u32 v);
+ void (*write)(u32 reg, u32 v);
+ u32 (*read)(u32 reg);
+
+ /* IPI related functions */
+ void (*wait_icr_idle)(void);
+ u32 (*safe_wait_icr_idle)(void);
+
+ void (*send_IPI)(int cpu, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+ void (*send_IPI_self)(int vector);
+
+ /* dest_logical is used by the IPI functions */
+ u32 dest_logical;
+ u32 disable_esr;
+ u32 irq_delivery_mode;
+ u32 irq_dest_mode;
+
+ /* Functions and data related to vector allocation */
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask);
+ int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
+ struct irq_data *irqdata,
+ unsigned int *apicid);
+ u32 (*calc_dest_apicid)(unsigned int cpu);
+
+ /* ICR related functions */
+ u64 (*icr_read)(void);
+ void (*icr_write)(u32 low, u32 high);
+
+ /* Probe, setup and smpboot functions */
+ int (*probe)(void);
+ int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+ int (*apic_id_valid)(int apicid);
+ int (*apic_id_registered)(void);
+
+ bool (*check_apicid_used)(physid_mask_t *map, int apicid);
+ void (*init_apic_ldr)(void);
+ void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
+ void (*setup_apic_routing)(void);
+ int (*cpu_present_to_apicid)(int mps_cpu);
+ void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
+ int (*check_phys_apicid_present)(int phys_apicid);
+ int (*phys_pkg_id)(int cpuid_apic, int index_msb);
+
+ u32 (*get_apic_id)(unsigned long x);
+ u32 (*set_apic_id)(unsigned int id);
/* wakeup_secondary_cpu */
- int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
- void (*inquire_remote_apic)(int apicid);
-
- /* apic ops */
- u32 (*read)(u32 reg);
- void (*write)(u32 reg, u32 v);
- /*
- * ->eoi_write() has the same signature as ->write().
- *
- * Drivers can support both ->eoi_write() and ->write() by passing the same
- * callback value. Kernel can override ->eoi_write() and fall back
- * on write for EOI.
- */
- void (*eoi_write)(u32 reg, u32 v);
- void (*native_eoi_write)(u32 reg, u32 v);
- u64 (*icr_read)(void);
- void (*icr_write)(u32 low, u32 high);
- void (*wait_icr_idle)(void);
- u32 (*safe_wait_icr_idle)(void);
+ void (*inquire_remote_apic)(int apicid);
#ifdef CONFIG_X86_32
/*
@@ -346,6 +351,7 @@ struct apic {
*/
int (*x86_32_early_logical_apicid)(int cpu);
#endif
+ char *name;
};
/*
@@ -380,6 +386,7 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
*/
#ifdef CONFIG_SMP
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int lapic_can_unplug_cpu(void);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
@@ -463,84 +470,33 @@ static inline unsigned default_get_apic_id(unsigned long x)
extern void apic_send_IPI_self(int vector);
DECLARE_PER_CPU(int, x2apic_extra_bits);
-
-extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int phys_apicid);
#endif
extern void generic_bigsmp_probe(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-static inline const struct cpumask *default_target_cpus(void)
-{
-#ifdef CONFIG_SMP
- return cpu_online_mask;
-#else
- return cpumask_of(0);
-#endif
-}
-
-static inline const struct cpumask *online_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
+extern struct apic apic_noop;
static inline unsigned int read_apic_id(void)
{
- unsigned int reg;
-
- reg = apic_read(APIC_ID);
+ unsigned int reg = apic_read(APIC_ID);
return apic->get_apic_id(reg);
}
-static inline int default_apic_id_valid(int apicid)
-{
- return (apicid < 255);
-}
-
+extern int default_apic_id_valid(int apicid);
extern int default_acpi_madt_oem_check(char *, char *);
-
extern void default_setup_apic_routing(void);
-extern struct apic apic_noop;
-
-#ifdef CONFIG_X86_32
-
-static inline int noop_x86_32_early_logical_apicid(int cpu)
-{
- return BAD_APICID;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-extern void default_init_apic_ldr(void);
-
-static inline int default_apic_id_registered(void)
-{
- return physid_isset(read_apic_id(), phys_cpu_present_map);
-}
-
-static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-#endif
+extern u32 apic_default_calc_apicid(unsigned int cpu);
+extern u32 apic_flat_calc_apicid(unsigned int cpu);
extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
struct irq_data *irqdata,
@@ -548,71 +504,17 @@ extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
struct irq_data *irqdata,
unsigned int *apicid);
-
-static inline void
-flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-static inline void
-default_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- cpumask_copy(retmask, cpumask_of(cpu));
-}
-
-static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- *retmap = *phys_map;
-}
-
-static inline int __default_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
- return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static inline int
-__default_check_phys_apicid_present(int phys_apicid)
-{
- return physid_isset(phys_apicid, phys_cpu_present_map);
-}
-
-#ifdef CONFIG_X86_32
-static inline int default_cpu_present_to_apicid(int mps_cpu)
-{
- return __default_cpu_present_to_apicid(mps_cpu);
-}
-
-static inline int
-default_check_phys_apicid_present(int phys_apicid)
-{
- return __default_check_phys_apicid_present(phys_apicid);
-}
-#else
+extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
+extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask);
+extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask);
+extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
extern int default_cpu_present_to_apicid(int mps_cpu);
extern int default_check_phys_apicid_present(int phys_apicid);
-#endif
#endif /* CONFIG_X86_LOCAL_APIC */
+
extern void irq_enter(void);
extern void irq_exit(void);
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 0a3e808b9123..4011cb03ef08 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -393,7 +393,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
void update_intr_gate(unsigned int n, const void *addr);
void alloc_intr_gate(unsigned int n, const void *addr);
-extern unsigned long used_vectors[];
+extern unsigned long system_vectors[];
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u32, debug_idt_ctr);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 8ec99a55e6b9..b80e46733909 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,6 +16,8 @@
#include <asm/irq_vectors.h>
+#define IRQ_MATRIX_BITS NR_VECTORS
+
#ifndef __ASSEMBLY__
#include <linux/percpu.h>
@@ -123,15 +125,13 @@ struct irq_alloc_info {
struct irq_cfg {
unsigned int dest_apicid;
- u8 vector;
- u8 old_vector;
+ unsigned int vector;
};
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
-extern void setup_vector_irq(int cpu);
#ifdef CONFIG_SMP
extern void send_cleanup_vector(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 5c27e146a166..a8834dd546cd 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -193,7 +193,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
extern void setup_IO_APIC(void);
extern void enable_IO_APIC(void);
extern void disable_IO_APIC(void);
-extern void setup_ioapic_dest(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
extern void print_IO_APICs(void);
#else /* !CONFIG_X86_IO_APIC */
@@ -233,7 +232,6 @@ static inline void io_apic_init_mappings(void) { }
static inline void setup_IO_APIC(void) { }
static inline void enable_IO_APIC(void) { }
-static inline void setup_ioapic_dest(void) { }
#endif
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index d8632f8fa17d..2395bb794c7b 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -26,11 +26,7 @@ extern void irq_ctx_init(int cpu);
struct irq_desc;
-#ifdef CONFIG_HOTPLUG_CPU
-#include <linux/cpumask.h>
-extern int check_irq_vectors_for_cpu_disable(void);
extern void fixup_irqs(void);
-#endif
#ifdef CONFIG_HAVE_KVM
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index c20ffca8fef1..67421f649cfa 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,12 +102,8 @@
#define POSTED_INTR_NESTED_VECTOR 0xf0
#endif
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
+#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
+#define LOCAL_TIMER_VECTOR 0xee
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 423e112c1e8f..f695cc6b8e1f 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -9,6 +9,7 @@
enum {
/* Allocate contiguous CPU vectors */
X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1,
+ X86_IRQ_ALLOC_LEGACY = 0x2,
};
extern struct irq_domain *x86_vector_domain;
@@ -42,8 +43,8 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg);
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
-extern void mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data);
+extern int mp_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early);
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data);
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c73e493adf07..9d7d856b2d89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1419,7 +1419,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
static inline int kvm_cpu_get_apicid(int mps_cpu)
{
#ifdef CONFIG_X86_LOCAL_APIC
- return __default_cpu_present_to_apicid(mps_cpu);
+ return default_cpu_present_to_apicid(mps_cpu);
#else
WARN_ON_ONCE(1);
return BAD_APICID;
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 8eb139ed1a03..84b9ec0c1bc0 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -138,6 +138,254 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
#endif
+TRACE_EVENT(vector_config,
+
+ TP_PROTO(unsigned int irq, unsigned int vector,
+ unsigned int cpu, unsigned int apicdest),
+
+ TP_ARGS(irq, vector, cpu, apicdest),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( unsigned int, vector )
+ __field( unsigned int, cpu )
+ __field( unsigned int, apicdest )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->vector = vector;
+ __entry->cpu = cpu;
+ __entry->apicdest = apicdest;
+ ),
+
+ TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x",
+ __entry->irq, __entry->vector, __entry->cpu,
+ __entry->apicdest)
+);
+
+DECLARE_EVENT_CLASS(vector_mod,
+
+ TP_PROTO(unsigned int irq, unsigned int vector,
+ unsigned int cpu, unsigned int prev_vector,
+ unsigned int prev_cpu),
+
+ TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( unsigned int, vector )
+ __field( unsigned int, cpu )
+ __field( unsigned int, prev_vector )
+ __field( unsigned int, prev_cpu )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->vector = vector;
+ __entry->cpu = cpu;
+ __entry->prev_vector = prev_vector;
+ __entry->prev_cpu = prev_cpu;
+
+ ),
+
+ TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u",
+ __entry->irq, __entry->vector, __entry->cpu,
+ __entry->prev_vector, __entry->prev_cpu)
+);
+
+#define DEFINE_IRQ_VECTOR_MOD_EVENT(name) \
+DEFINE_EVENT_FN(vector_mod, name, \
+ TP_PROTO(unsigned int irq, unsigned int vector, \
+ unsigned int cpu, unsigned int prev_vector, \
+ unsigned int prev_cpu), \
+ TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL); \
+
+DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update);
+DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear);
+
+DECLARE_EVENT_CLASS(vector_reserve,
+
+ TP_PROTO(unsigned int irq, int ret),
+
+ TP_ARGS(irq, ret),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret)
+);
+
+#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name) \
+DEFINE_EVENT_FN(vector_reserve, name, \
+ TP_PROTO(unsigned int irq, int ret), \
+ TP_ARGS(irq, ret), NULL, NULL); \
+
+DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed);
+DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve);
+
+TRACE_EVENT(vector_alloc,
+
+ TP_PROTO(unsigned int irq, unsigned int vector, bool reserved,
+ int ret),
+
+ TP_ARGS(irq, vector, ret, reserved),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( unsigned int, vector )
+ __field( bool, reserved )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->vector = ret < 0 ? 0 : vector;
+ __entry->reserved = reserved;
+ __entry->ret = ret > 0 ? 0 : ret;
+ ),
+
+ TP_printk("irq=%u vector=%u reserved=%d ret=%d",
+ __entry->irq, __entry->vector,
+ __entry->reserved, __entry->ret)
+);
+
+TRACE_EVENT(vector_alloc_managed,
+
+ TP_PROTO(unsigned int irq, unsigned int vector,
+ int ret),
+
+ TP_ARGS(irq, vector, ret),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( unsigned int, vector )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->vector = ret < 0 ? 0 : vector;
+ __entry->ret = ret > 0 ? 0 : ret;
+ ),
+
+ TP_printk("irq=%u vector=%u ret=%d",
+ __entry->irq, __entry->vector, __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(vector_activate,
+
+ TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
+ bool early),
+
+ TP_ARGS(irq, is_managed, can_reserve, early),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( bool, is_managed )
+ __field( bool, can_reserve )
+ __field( bool, early )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->is_managed = is_managed;
+ __entry->can_reserve = can_reserve;
+ __entry->early = early;
+ ),
+
+ TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+ __entry->irq, __entry->is_managed, __entry->can_reserve,
+ __entry->early)
+);
+
+#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
+DEFINE_EVENT_FN(vector_activate, name, \
+ TP_PROTO(unsigned int irq, bool is_managed, \
+ bool can_reserve, bool early), \
+ TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \
+
+DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
+DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
+
+TRACE_EVENT(vector_teardown,
+
+ TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved),
+
+ TP_ARGS(irq, is_managed, has_reserved),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( bool, is_managed )
+ __field( bool, has_reserved )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->is_managed = is_managed;
+ __entry->has_reserved = has_reserved;
+ ),
+
+ TP_printk("irq=%u is_managed=%d has_reserved=%d",
+ __entry->irq, __entry->is_managed, __entry->has_reserved)
+);
+
+TRACE_EVENT(vector_setup,
+
+ TP_PROTO(unsigned int irq, bool is_legacy, int ret),
+
+ TP_ARGS(irq, is_legacy, ret),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( bool, is_legacy )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->is_legacy = is_legacy;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("irq=%u is_legacy=%d ret=%d",
+ __entry->irq, __entry->is_legacy, __entry->ret)
+);
+
+TRACE_EVENT(vector_free_moved,
+
+ TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector,
+ bool is_managed),
+
+ TP_ARGS(irq, cpu, vector, is_managed),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, irq )
+ __field( unsigned int, cpu )
+ __field( unsigned int, vector )
+ __field( bool, is_managed )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->cpu = cpu;
+ __entry->vector = vector;
+ __entry->is_managed = is_managed;
+ ),
+
+ TP_printk("irq=%u cpu=%u vector=%u is_managed=%d",
+ __entry->irq, __entry->cpu, __entry->vector,
+ __entry->is_managed)
+);
+
+
#endif /* CONFIG_X86_LOCAL_APIC */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
deleted file mode 100644
index 78ccf28d17db..000000000000
--- a/arch/x86/include/asm/x2apic.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common bits for X2APIC cluster/physical modes.
- */
-
-#ifndef _ASM_X86_X2APIC_H
-#define _ASM_X86_X2APIC_H
-
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <linux/cpumask.h>
-
-static int x2apic_apic_id_valid(int apicid)
-{
- return 1;
-}
-
-static int x2apic_apic_id_registered(void)
-{
- return 1;
-}
-
-static void
-__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
-{
- unsigned long cfg = __prepare_ICR(0, vector, dest);
- native_x2apic_icr_write(cfg, apicid);
-}
-
-static unsigned int x2apic_get_apic_id(unsigned long id)
-{
- return id;
-}
-
-static unsigned long x2apic_set_apic_id(unsigned int id)
-{
- return id;
-}
-
-static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
-{
- return initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
- apic_write(APIC_SELF_IPI, vector);
-}
-
-#endif /* _ASM_X86_X2APIC_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 8a1ebf9540dd..63d0eb250792 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -51,11 +51,13 @@ struct x86_init_resources {
* are set up.
* @intr_init: interrupt init code
* @trap_init: platform specific trap setup
+ * @intr_mode_init: interrupt delivery mode setup
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
void (*trap_init)(void);
+ void (*intr_mode_init)(void);
};
/**
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 2fb7309c6900..a9e08924927e 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -7,7 +7,7 @@
# In particualr, smp_apic_timer_interrupt() is called in random places.
KCOV_INSTRUMENT := n
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o
obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ff891772c9f8..132bf45c943a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -211,11 +211,7 @@ static inline int lapic_get_version(void)
*/
static inline int lapic_is_integrated(void)
{
-#ifdef CONFIG_X86_64
- return 1;
-#else
return APIC_INTEGRATED(lapic_get_version());
-#endif
}
/*
@@ -298,14 +294,11 @@ int get_physical_broadcast(void)
*/
int lapic_get_maxlvt(void)
{
- unsigned int v;
-
- v = apic_read(APIC_LVR);
/*
* - we always have APIC integrated on 64bit mode
* - 82489DXs do not report # of LVT entries
*/
- return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+ return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
}
/*
@@ -1229,53 +1222,100 @@ void __init sync_Arb_IDs(void)
APIC_INT_LEVELTRIG | APIC_DM_INIT);
}
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
+enum apic_intr_mode_id apic_intr_mode;
+
+static int __init apic_intr_mode_select(void)
{
- unsigned int value;
+ /* Check kernel option */
+ if (disable_apic) {
+ pr_info("APIC disabled via kernel command line\n");
+ return APIC_PIC;
+ }
- /*
- * Don't do the setup now if we have a SMP BIOS as the
- * through-I/O-APIC virtual wire mode might be active.
- */
- if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
- return;
+ /* Check BIOS */
+#ifdef CONFIG_X86_64
+ /* On 64-bit, the APIC must be integrated, Check local APIC only */
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
+ disable_apic = 1;
+ pr_info("APIC disabled by BIOS\n");
+ return APIC_PIC;
+ }
+#else
+ /* On 32-bit, the APIC may be integrated APIC or 82489DX */
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
+ /* Neither 82489DX nor integrated APIC ? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
+ disable_apic = 1;
+ return APIC_PIC;
+ }
- /*
- * Enable APIC.
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
+ /* If the BIOS pretends there is an integrated APIC ? */
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
+ APIC_INTEGRATED(boot_cpu_apic_version)) {
+ disable_apic = 1;
+ pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
+ boot_cpu_physical_apicid);
+ return APIC_PIC;
+ }
+#endif
-#ifdef CONFIG_X86_32
- /* This bit is reserved on P4/Xeon and should be cleared */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- (boot_cpu_data.x86 == 15))
- value &= ~APIC_SPIV_FOCUS_DISABLED;
- else
+ /* Check MP table or ACPI MADT configuration */
+ if (!smp_found_config) {
+ disable_ioapic_support();
+ if (!acpi_lapic) {
+ pr_info("APIC: ACPI MADT or MP tables are not detected\n");
+ return APIC_VIRTUAL_WIRE_NO_CONFIG;
+ }
+ return APIC_VIRTUAL_WIRE;
+ }
+
+#ifdef CONFIG_SMP
+ /* If SMP should be disabled, then really disable it! */
+ if (!setup_max_cpus) {
+ pr_info("APIC: SMP mode deactivated\n");
+ return APIC_SYMMETRIC_IO_NO_ROUTING;
+ }
+
+ if (read_apic_id() != boot_cpu_physical_apicid) {
+ panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+ read_apic_id(), boot_cpu_physical_apicid);
+ /* Or can we switch back to PIC here? */
+ }
#endif
- value |= APIC_SPIV_FOCUS_DISABLED;
- value |= SPURIOUS_APIC_VECTOR;
- apic_write(APIC_SPIV, value);
- /*
- * Set up the virtual wire mode.
- */
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
- value = APIC_DM_NMI;
- if (!lapic_is_integrated()) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
- if (apic_extnmi == APIC_EXTNMI_NONE)
- value |= APIC_LVT_MASKED;
- apic_write(APIC_LVT1, value);
+ return APIC_SYMMETRIC_IO;
+}
+
+/* Init the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_init(void)
+{
+ bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
+
+ apic_intr_mode = apic_intr_mode_select();
+
+ switch (apic_intr_mode) {
+ case APIC_PIC:
+ pr_info("APIC: Keep in PIC mode(8259)\n");
+ return;
+ case APIC_VIRTUAL_WIRE:
+ pr_info("APIC: Switch to virtual wire mode setup\n");
+ default_setup_apic_routing();
+ break;
+ case APIC_VIRTUAL_WIRE_NO_CONFIG:
+ pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
+ upmode = true;
+ default_setup_apic_routing();
+ break;
+ case APIC_SYMMETRIC_IO:
+ pr_info("APIC: Switch to symmetric I/O mode setup\n");
+ default_setup_apic_routing();
+ break;
+ case APIC_SYMMETRIC_IO_NO_ROUTING:
+ pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
+ break;
+ }
+
+ apic_bsp_setup(upmode);
}
static void lapic_setup_esr(void)
@@ -1499,7 +1539,9 @@ void setup_local_APIC(void)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!lapic_is_integrated()) /* 82489DX */
+
+ /* Is 82489DX ? */
+ if (!lapic_is_integrated())
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
@@ -1885,8 +1927,8 @@ void __init init_apic_mappings(void)
* yeah -- we lie about apic_version
* in case if apic was disabled via boot option
* but it's not a problem for SMP compiled kernel
- * since smp_sanity_check is prepared for such a case
- * and disable smp mode
+ * since apic_intr_mode_select is prepared for such
+ * a case and disable smp mode
*/
boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
}
@@ -2242,44 +2284,6 @@ int hard_smp_processor_id(void)
return read_apic_id();
}
-void default_init_apic_ldr(void)
-{
- unsigned long val;
-
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
- apic_write(APIC_LDR, val);
-}
-
-int default_cpu_mask_to_apicid(const struct cpumask *mask,
- struct irq_data *irqdata,
- unsigned int *apicid)
-{
- unsigned int cpu = cpumask_first(mask);
-
- if (cpu >= nr_cpu_ids)
- return -EINVAL;
- *apicid = per_cpu(x86_cpu_to_apicid, cpu);
- irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
- return 0;
-}
-
-int flat_cpu_mask_to_apicid(const struct cpumask *mask,
- struct irq_data *irqdata,
- unsigned int *apicid)
-
-{
- struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
- unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
-
- if (!cpu_mask)
- return -EINVAL;
- *apicid = (unsigned int)cpu_mask;
- cpumask_bits(effmsk)[0] = cpu_mask;
- return 0;
-}
-
/*
* Override the generic EOI implementation with an optimized version.
* Only called during early boot when only one CPU is active and with
@@ -2322,72 +2326,27 @@ static void __init apic_bsp_up_setup(void)
* Returns:
* apic_id of BSP APIC
*/
-int __init apic_bsp_setup(bool upmode)
+void __init apic_bsp_setup(bool upmode)
{
- int id;
-
connect_bsp_APIC();
if (upmode)
apic_bsp_up_setup();
setup_local_APIC();
- if (x2apic_mode)
- id = apic_read(APIC_LDR);
- else
- id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-
enable_IO_APIC();
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
- /* Setup local timer */
- x86_init.timers.setup_percpu_clockev();
- return id;
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor(void)
-{
- if (disable_apic) {
- pr_info("Apic disabled\n");
- return -1;
- }
-#ifdef CONFIG_X86_64
- if (!boot_cpu_has(X86_FEATURE_APIC)) {
- disable_apic = 1;
- pr_info("Apic disabled by BIOS\n");
- return -1;
- }
-#else
- if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
- return -1;
-
- /*
- * Complain if the BIOS pretends there is one.
- */
- if (!boot_cpu_has(X86_FEATURE_APIC) &&
- APIC_INTEGRATED(boot_cpu_apic_version)) {
- pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
- boot_cpu_physical_apicid);
- return -1;
- }
-#endif
-
- if (!smp_found_config)
- disable_ioapic_support();
-
- default_setup_apic_routing();
- apic_bsp_setup(true);
- return 0;
}
#ifdef CONFIG_UP_LATE_INIT
void __init up_late_init(void)
{
- APIC_init_uniprocessor();
+ if (apic_intr_mode == APIC_PIC)
+ return;
+
+ /* Setup local timer */
+ x86_init.timers.setup_percpu_clockev();
}
#endif
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
new file mode 100644
index 000000000000..a360801779ae
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -0,0 +1,46 @@
+/*
+ * Common functions shared between the various APIC flavours
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+#include <linux/irq.h>
+#include <asm/apic.h>
+
+u32 apic_default_calc_apicid(unsigned int cpu)
+{
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+u32 apic_flat_calc_apicid(unsigned int cpu)
+{
+ return 1U << cpu;
+}
+
+bool default_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return physid_isset(apicid, *map);
+}
+
+void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+ *retmap = *phys_map;
+}
+
+int default_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid);
+
+int default_check_phys_apicid_present(int phys_apicid)
+{
+ return physid_isset(phys_apicid, phys_cpu_present_map);
+}
+
+int default_apic_id_valid(int apicid)
+{
+ return (apicid < 255);
+}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index dedd5a41ba48..aa85690e9b64 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x)
return (x >> 24) & 0xFF;
}
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
{
return (id & 0xFF) << 24;
}
@@ -154,12 +154,10 @@ static struct apic apic_flat __ro_after_init = {
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -172,7 +170,7 @@ static struct apic apic_flat __ro_after_init = {
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_flat_calc_apicid,
.send_IPI = default_send_IPI_single,
.send_IPI_mask = flat_send_IPI_mask,
@@ -249,12 +247,10 @@ static struct apic apic_physflat __ro_after_init = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
/* not needed, but shouldn't hurt: */
.init_apic_ldr = flat_init_apic_ldr,
@@ -268,7 +264,7 @@ static struct apic apic_physflat __ro_after_init = {
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_default_calc_apicid,
.send_IPI = default_send_IPI_single_phys,
.send_IPI_mask = default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c8d211277315..7b659c4480c9 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -84,20 +84,6 @@ static int noop_apic_id_registered(void)
return physid_isset(0, phys_cpu_present_map);
}
-static const struct cpumask *noop_target_cpus(void)
-{
- /* only BSP here */
- return cpumask_of(0);
-}
-
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- if (cpu != 0)
- pr_warning("APIC: Vector allocated for non-BSP cpu\n");
- cpumask_copy(retmask, cpumask_of(cpu));
-}
-
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
@@ -109,6 +95,13 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
+#ifdef CONFIG_X86_32
+static int noop_x86_32_early_logical_apicid(int cpu)
+{
+ return BAD_APICID;
+}
+#endif
+
struct apic apic_noop __ro_after_init = {
.name = "noop",
.probe = noop_probe,
@@ -121,12 +114,10 @@ struct apic apic_noop __ro_after_init = {
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
- .target_cpus = noop_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = default_check_apicid_used,
- .vector_allocation_domain = noop_vector_allocation_domain,
.init_apic_ldr = noop_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
@@ -142,7 +133,7 @@ struct apic apic_noop __ro_after_init = {
.get_apic_id = noop_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_flat_calc_apicid,
.send_IPI = noop_send_IPI,
.send_IPI_mask = noop_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 2fda912219a6..134e04506ab4 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
return id;
}
-static unsigned long numachip1_set_apic_id(unsigned int id)
+static u32 numachip1_set_apic_id(unsigned int id)
{
return (id & 0xff) << 24;
}
@@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x)
return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
}
-static unsigned long numachip2_set_apic_id(unsigned int id)
+static u32 numachip2_set_apic_id(unsigned int id)
{
return id << 24;
}
@@ -249,12 +249,10 @@ static const struct apic apic_numachip1 __refconst = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -267,7 +265,7 @@ static const struct apic apic_numachip1 __refconst = {
.get_apic_id = numachip1_get_apic_id,
.set_apic_id = numachip1_set_apic_id,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_default_calc_apicid,
.send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
@@ -300,12 +298,10 @@ static const struct apic apic_numachip2 __refconst = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -318,7 +314,7 @@ static const struct apic apic_numachip2 __refconst = {
.get_apic_id = numachip2_get_apic_id,
.set_apic_id = numachip2_set_apic_id,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_default_calc_apicid,
.send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index e12fbcfc9571..afee386ff711 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -27,9 +27,9 @@ static int bigsmp_apic_id_registered(void)
return 1;
}
-static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
+static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
{
- return 0;
+ return false;
}
static int bigsmp_early_logical_apicid(int cpu)
@@ -155,12 +155,10 @@ static struct apic apic_bigsmp __ro_after_init = {
/* phys delivery to target CPU: */
.irq_dest_mode = 0,
- .target_cpus = default_target_cpus,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = bigsmp_check_apicid_used,
- .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = bigsmp_init_apic_ldr,
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
@@ -173,7 +171,7 @@ static struct apic apic_bigsmp __ro_after_init = {
.get_apic_id = bigsmp_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_default_calc_apicid,
.send_IPI = default_send_IPI_single_phys,
.send_IPI_mask = default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index 56ccf9346b08..b07075dce8b7 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -112,8 +112,8 @@ static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-static void htirq_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static int htirq_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
struct ht_irq_msg msg;
struct irq_cfg *cfg = irqd_cfg(irq_data);
@@ -132,6 +132,7 @@ static void htirq_domain_activate(struct irq_domain *domain,
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
write_ht_irq_msg(irq_data->irq, &msg);
+ return 0;
}
static void htirq_domain_deactivate(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3b89b27945ff..201579dc5242 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1014,6 +1014,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
info->ioapic_pin))
return -ENOMEM;
} else {
+ info->flags |= X86_IRQ_ALLOC_LEGACY;
irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
NULL);
if (irq >= 0) {
@@ -1586,6 +1587,43 @@ static int __init notimercheck(char *s)
}
__setup("no_timer_check", notimercheck);
+static void __init delay_with_tsc(void)
+{
+ unsigned long long start, now;
+ unsigned long end = jiffies + 4;
+
+ start = rdtsc();
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 40000000000/HZ TSC cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ */
+ do {
+ rep_nop();
+ now = rdtsc();
+ } while ((now - start) < 40000000000UL / HZ &&
+ time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+ unsigned long end = jiffies + 4;
+ int band = 1;
+
+ /*
+ * We don't know any frequency yet, but waiting for
+ * 40940000000/HZ cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+ */
+ do {
+ __delay(((1U << band++) * 10000000UL) / HZ);
+ } while (band < 12 && time_before_eq(jiffies, end));
+}
+
/*
* There is a nasty bug in some older SMP boards, their mptable lies
* about the timer IRQ. We do the following to work around the situation:
@@ -1604,8 +1642,12 @@ static int __init timer_irq_works(void)
local_save_flags(flags);
local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
+
+ if (boot_cpu_has(X86_FEATURE_TSC))
+ delay_with_tsc();
+ else
+ delay_without_tsc();
+
local_irq_restore(flags);
/*
@@ -1821,26 +1863,36 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
eoi_ioapic_pin(data->entry.vector, data);
}
+static void ioapic_configure_entry(struct irq_data *irqd)
+{
+ struct mp_chip_data *mpd = irqd->chip_data;
+ struct irq_cfg *cfg = irqd_cfg(irqd);
+ struct irq_pin_list *entry;
+
+ /*
+ * Only update when the parent is the vector domain, don't touch it
+ * if the parent is the remapping domain. Check the installed
+ * ioapic chip to verify that.
+ */
+ if (irqd->chip == &ioapic_chip) {
+ mpd->entry.dest = cfg->dest_apicid;
+ mpd->entry.vector = cfg->vector;
+ }
+ for_each_irq_pin(entry, mpd->irq_2_pin)
+ __ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
+}
+
static int ioapic_set_affinity(struct irq_data *irq_data,
const struct cpumask *mask, bool force)
{
struct irq_data *parent = irq_data->parent_data;
- struct mp_chip_data *data = irq_data->chip_data;
- struct irq_pin_list *entry;
- struct irq_cfg *cfg;
unsigned long flags;
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
raw_spin_lock_irqsave(&ioapic_lock, flags);
- if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
- cfg = irqd_cfg(irq_data);
- data->entry.dest = cfg->dest_apicid;
- data->entry.vector = cfg->vector;
- for_each_irq_pin(entry, data->irq_2_pin)
- __ioapic_write_entry(entry->apic, entry->pin,
- data->entry);
- }
+ if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
+ ioapic_configure_entry(irq_data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return ret;
@@ -2097,7 +2149,7 @@ static inline void __init check_timer(void)
unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
- irq_domain_activate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
if (timer_irq_works()) {
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -2119,7 +2171,7 @@ static inline void __init check_timer(void)
*/
replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
irq_domain_deactivate_irq(irq_data);
- irq_domain_activate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2513,52 +2565,9 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
}
/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be apic->target_cpus()
+ * This function updates target affinity of IOAPIC interrupts to include
+ * the CPUs which came online during SMP bringup.
*/
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
- const struct cpumask *mask;
- struct irq_desc *desc;
- struct irq_data *idata;
- struct irq_chip *chip;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for_each_ioapic_pin(ioapic, pin) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
-
- irq = pin_2_irq(irq_entry, ioapic, pin, 0);
- if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
- continue;
-
- desc = irq_to_desc(irq);
- raw_spin_lock_irq(&desc->lock);
- idata = irq_desc_get_irq_data(desc);
-
- /*
- * Honour affinities which have been set in early boot
- */
- if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
- mask = irq_data_get_affinity_mask(idata);
- else
- mask = apic->target_cpus();
-
- chip = irq_data_get_irq_chip(idata);
- /* Might be lapic_chip for irq 0 */
- if (chip->irq_set_affinity)
- chip->irq_set_affinity(idata, mask, false);
- raw_spin_unlock_irq(&desc->lock);
- }
-}
-#endif
-
#define IOAPIC_RESOURCE_NAME_SIZE 11
static struct resource *ioapic_resources;
@@ -2978,17 +2987,15 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-void mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+int mp_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
unsigned long flags;
- struct irq_pin_list *entry;
- struct mp_chip_data *data = irq_data->chip_data;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, data->irq_2_pin)
- __ioapic_write_entry(entry->apic, entry->pin, data->entry);
+ ioapic_configure_entry(irq_data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return 0;
}
void mp_irqdomain_deactivate(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 63287659adb6..fa22017de806 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,6 +66,31 @@ static void setup_apic_flat_routing(void)
#endif
}
+static int default_apic_id_registered(void)
+{
+ return physid_isset(read_apic_id(), phys_cpu_present_map);
+}
+
+/*
+ * Set up the logical destination ID. Intel recommends to set DFR, LDR and
+ * TPR before enabling an APIC. See e.g. "AP-388 82489DX User's Manual"
+ * (Intel document number 292116).
+ */
+static void default_init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+ apic_write(APIC_LDR, val);
+}
+
+static int default_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
/* should be called last. */
static int probe_default(void)
{
@@ -84,12 +109,10 @@ static struct apic apic_default __ro_after_init = {
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
- .target_cpus = default_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = default_check_apicid_used,
- .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = default_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
@@ -102,7 +125,7 @@ static struct apic apic_default __ro_after_init = {
.get_apic_id = default_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_flat_calc_apicid,
.send_IPI = default_send_IPI_single,
.send_IPI_mask = default_send_IPI_mask_logical,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 88c214e75a6b..05c85e693a5d 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -11,6 +11,7 @@
* published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
+#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/slab.h>
@@ -21,20 +22,30 @@
#include <asm/desc.h>
#include <asm/irq_remapping.h>
+#include <asm/trace/irq_vectors.h>
+
struct apic_chip_data {
- struct irq_cfg cfg;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- u8 move_in_progress : 1;
+ struct irq_cfg hw_irq_cfg;
+ unsigned int vector;
+ unsigned int prev_vector;
+ unsigned int cpu;
+ unsigned int prev_cpu;
+ unsigned int irq;
+ struct hlist_node clist;
+ unsigned int move_in_progress : 1,
+ is_managed : 1,
+ can_reserve : 1,
+ has_reserved : 1;
};
struct irq_domain *x86_vector_domain;
EXPORT_SYMBOL_GPL(x86_vector_domain);
static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
+static cpumask_var_t vector_searchmask;
static struct irq_chip lapic_controller;
-#ifdef CONFIG_X86_IO_APIC
-static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
+static struct irq_matrix *vector_matrix;
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
#endif
void lock_vector_lock(void)
@@ -50,22 +61,37 @@ void unlock_vector_lock(void)
raw_spin_unlock(&vector_lock);
}
-static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
+void init_irq_alloc_info(struct irq_alloc_info *info,
+ const struct cpumask *mask)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = mask;
+}
+
+void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
{
- if (!irq_data)
+ if (src)
+ *dst = *src;
+ else
+ memset(dst, 0, sizeof(*dst));
+}
+
+static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
+{
+ if (!irqd)
return NULL;
- while (irq_data->parent_data)
- irq_data = irq_data->parent_data;
+ while (irqd->parent_data)
+ irqd = irqd->parent_data;
- return irq_data->chip_data;
+ return irqd->chip_data;
}
-struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+struct irq_cfg *irqd_cfg(struct irq_data *irqd)
{
- struct apic_chip_data *data = apic_chip_data(irq_data);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
- return data ? &data->cfg : NULL;
+ return apicd ? &apicd->hw_irq_cfg : NULL;
}
EXPORT_SYMBOL_GPL(irqd_cfg);
@@ -76,270 +102,395 @@ struct irq_cfg *irq_cfg(unsigned int irq)
static struct apic_chip_data *alloc_apic_chip_data(int node)
{
- struct apic_chip_data *data;
+ struct apic_chip_data *apicd;
- data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
- if (!data)
- return NULL;
- if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
- goto out_data;
- if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
- goto out_domain;
- return data;
-out_domain:
- free_cpumask_var(data->domain);
-out_data:
- kfree(data);
- return NULL;
-}
-
-static void free_apic_chip_data(struct apic_chip_data *data)
-{
- if (data) {
- free_cpumask_var(data->domain);
- free_cpumask_var(data->old_domain);
- kfree(data);
+ apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
+ if (apicd)
+ INIT_HLIST_NODE(&apicd->clist);
+ return apicd;
+}
+
+static void free_apic_chip_data(struct apic_chip_data *apicd)
+{
+ kfree(apicd);
+}
+
+static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
+ unsigned int cpu)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ lockdep_assert_held(&vector_lock);
+
+ apicd->hw_irq_cfg.vector = vector;
+ apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+ irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+ trace_vector_config(irqd->irq, vector, cpu,
+ apicd->hw_irq_cfg.dest_apicid);
+}
+
+static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
+ unsigned int newcpu)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ struct irq_desc *desc = irq_data_to_desc(irqd);
+
+ lockdep_assert_held(&vector_lock);
+
+ trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
+ apicd->cpu);
+
+ /* Setup the vector move, if required */
+ if (apicd->vector && cpu_online(apicd->cpu)) {
+ apicd->move_in_progress = true;
+ apicd->prev_vector = apicd->vector;
+ apicd->prev_cpu = apicd->cpu;
+ } else {
+ apicd->prev_vector = 0;
}
+
+ apicd->vector = newvec;
+ apicd->cpu = newcpu;
+ BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
+ per_cpu(vector_irq, newcpu)[newvec] = desc;
}
-static int __assign_irq_vector(int irq, struct apic_chip_data *d,
- const struct cpumask *mask,
- struct irq_data *irqdata)
+static void vector_assign_managed_shutdown(struct irq_data *irqd)
{
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
- static int current_offset = VECTOR_OFFSET_START % 16;
- int cpu, vector;
+ unsigned int cpu = cpumask_first(cpu_online_mask);
- /*
- * If there is still a move in progress or the previous move has not
- * been cleaned up completely, tell the caller to come back later.
- */
- if (d->move_in_progress ||
- cpumask_intersects(d->old_domain, cpu_online_mask))
- return -EBUSY;
+ apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
+}
- /* Only try and allocate irqs on cpus that are present */
- cpumask_clear(d->old_domain);
- cpumask_clear(searched_cpumask);
- cpu = cpumask_first_and(mask, cpu_online_mask);
- while (cpu < nr_cpu_ids) {
- int new_cpu, offset;
+static int reserve_managed_vector(struct irq_data *irqd)
+{
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+ int ret;
- /* Get the possible target cpus for @mask/@cpu from the apic */
- apic->vector_allocation_domain(cpu, vector_cpumask, mask);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ apicd->is_managed = true;
+ ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ trace_vector_reserve_managed(irqd->irq, ret);
+ return ret;
+}
- /*
- * Clear the offline cpus from @vector_cpumask for searching
- * and verify whether the result overlaps with @mask. If true,
- * then the call to apic->cpu_mask_to_apicid() will
- * succeed as well. If not, no point in trying to find a
- * vector in this mask.
- */
- cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
- if (!cpumask_intersects(vector_searchmask, mask))
- goto next_cpu;
-
- if (cpumask_subset(vector_cpumask, d->domain)) {
- if (cpumask_equal(vector_cpumask, d->domain))
- goto success;
- /*
- * Mark the cpus which are not longer in the mask for
- * cleanup.
- */
- cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
- vector = d->cfg.vector;
- goto update;
- }
+static void reserve_irq_vector_locked(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
- vector = current_vector;
- offset = current_offset;
-next:
- vector += 16;
- if (vector >= FIRST_SYSTEM_VECTOR) {
- offset = (offset + 1) % 16;
- vector = FIRST_EXTERNAL_VECTOR + offset;
- }
+ irq_matrix_reserve(vector_matrix);
+ apicd->can_reserve = true;
+ apicd->has_reserved = true;
+ trace_vector_reserve(irqd->irq, 0);
+ vector_assign_managed_shutdown(irqd);
+}
- /* If the search wrapped around, try the next cpu */
- if (unlikely(current_vector == vector))
- goto next_cpu;
+static int reserve_irq_vector(struct irq_data *irqd)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ reserve_irq_vector_locked(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return 0;
+}
- if (test_bit(vector, used_vectors))
- goto next;
+static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ bool resvd = apicd->has_reserved;
+ unsigned int cpu = apicd->cpu;
+ int vector = apicd->vector;
- for_each_cpu(new_cpu, vector_searchmask) {
- if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
- goto next;
- }
- /* Found one! */
- current_vector = vector;
- current_offset = offset;
- /* Schedule the old vector for cleanup on all cpus */
- if (d->cfg.vector)
- cpumask_copy(d->old_domain, d->domain);
- for_each_cpu(new_cpu, vector_searchmask)
- per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
- goto update;
-
-next_cpu:
- /*
- * We exclude the current @vector_cpumask from the requested
- * @mask and try again with the next online cpu in the
- * result. We cannot modify @mask, so we use @vector_cpumask
- * as a temporary buffer here as it will be reassigned when
- * calling apic->vector_allocation_domain() above.
- */
- cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
- cpumask_andnot(vector_cpumask, mask, searched_cpumask);
- cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
- continue;
- }
- return -ENOSPC;
+ lockdep_assert_held(&vector_lock);
-update:
/*
- * Exclude offline cpus from the cleanup mask and set the
- * move_in_progress flag when the result is not empty.
+ * If the current target CPU is online and in the new requested
+ * affinity mask, there is no point in moving the interrupt from
+ * one CPU to another.
*/
- cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
- d->move_in_progress = !cpumask_empty(d->old_domain);
- d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
- d->cfg.vector = vector;
- cpumask_copy(d->domain, vector_cpumask);
-success:
- /*
- * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
- * as we already established, that mask & d->domain & cpu_online_mask
- * is not empty.
- *
- * vector_searchmask is a subset of d->domain and has the offline
- * cpus masked out.
- */
- cpumask_and(vector_searchmask, vector_searchmask, mask);
- BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
- &d->cfg.dest_apicid));
+ if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
+ return 0;
+
+ vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
+ if (vector > 0)
+ apic_update_vector(irqd, vector, cpu);
+ trace_vector_alloc(irqd->irq, vector, resvd, vector);
+ return vector;
+}
+
+static int assign_vector_locked(struct irq_data *irqd,
+ const struct cpumask *dest)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int vector = allocate_vector(irqd, dest);
+
+ if (vector < 0)
+ return vector;
+
+ apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
return 0;
}
-static int assign_irq_vector(int irq, struct apic_chip_data *data,
- const struct cpumask *mask,
- struct irq_data *irqdata)
+static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
{
- int err;
unsigned long flags;
+ int ret;
raw_spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, data, mask, irqdata);
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ ret = assign_vector_locked(irqd, vector_searchmask);
raw_spin_unlock_irqrestore(&vector_lock, flags);
- return err;
+ return ret;
}
-static int assign_irq_vector_policy(int irq, int node,
- struct apic_chip_data *data,
- struct irq_alloc_info *info,
- struct irq_data *irqdata)
+static int assign_irq_vector_any_locked(struct irq_data *irqd)
{
- if (info && info->mask)
- return assign_irq_vector(irq, data, info->mask, irqdata);
- if (node != NUMA_NO_NODE &&
- assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
+ /* Get the affinity mask - either irq_default_affinity or (user) set */
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ int node = irq_data_get_node(irqd);
+
+ if (node == NUMA_NO_NODE)
+ goto all;
+ /* Try the intersection of @affmsk and node mask */
+ cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+ if (!assign_vector_locked(irqd, vector_searchmask))
+ return 0;
+ /* Try the node mask */
+ if (!assign_vector_locked(irqd, cpumask_of_node(node)))
return 0;
- return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
+all:
+ /* Try the full affinity mask */
+ cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
+ if (!assign_vector_locked(irqd, vector_searchmask))
+ return 0;
+ /* Try the full online mask */
+ return assign_vector_locked(irqd, cpu_online_mask);
+}
+
+static int
+assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
+{
+ if (irqd_affinity_is_managed(irqd))
+ return reserve_managed_vector(irqd);
+ if (info->mask)
+ return assign_irq_vector(irqd, info->mask);
+ /*
+ * Make only a global reservation with no guarantee. A real vector
+ * is associated at activation time.
+ */
+ return reserve_irq_vector(irqd);
}
-static void clear_irq_vector(int irq, struct apic_chip_data *data)
+static int
+assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
{
- struct irq_desc *desc;
- int cpu, vector;
+ const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int vector, cpu;
- if (!data->cfg.vector)
+ cpumask_and(vector_searchmask, vector_searchmask, affmsk);
+ cpu = cpumask_first(vector_searchmask);
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+ /* set_affinity might call here for nothing */
+ if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
+ return 0;
+ vector = irq_matrix_alloc_managed(vector_matrix, cpu);
+ trace_vector_alloc_managed(irqd->irq, vector, vector);
+ if (vector < 0)
+ return vector;
+ apic_update_vector(irqd, vector, cpu);
+ apic_update_irq_cfg(irqd, vector, cpu);
+ return 0;
+}
+
+static void clear_irq_vector(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
+ unsigned int vector = apicd->vector;
+
+ lockdep_assert_held(&vector_lock);
+
+ if (!vector)
return;
- vector = data->cfg.vector;
- for_each_cpu_and(cpu, data->domain, cpu_online_mask)
- per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+ trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
+ apicd->prev_cpu);
- data->cfg.vector = 0;
- cpumask_clear(data->domain);
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+ irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+ apicd->vector = 0;
- /*
- * If move is in progress or the old_domain mask is not empty,
- * i.e. the cleanup IPI has not been processed yet, we need to remove
- * the old references to desc from all cpus vector tables.
- */
- if (!data->move_in_progress && cpumask_empty(data->old_domain))
+ /* Clean up move in progress */
+ vector = apicd->prev_vector;
+ if (!vector)
return;
- desc = irq_to_desc(irq);
- for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
- vector++) {
- if (per_cpu(vector_irq, cpu)[vector] != desc)
- continue;
- per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
- break;
- }
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+ irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+ apicd->prev_vector = 0;
+ apicd->move_in_progress = 0;
+ hlist_del_init(&apicd->clist);
+}
+
+static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+
+ trace_vector_deactivate(irqd->irq, apicd->is_managed,
+ apicd->can_reserve, false);
+
+ /* Regular fixed assigned interrupt */
+ if (!apicd->is_managed && !apicd->can_reserve)
+ return;
+ /* If the interrupt has a global reservation, nothing to do */
+ if (apicd->has_reserved)
+ return;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ clear_irq_vector(irqd);
+ if (apicd->can_reserve)
+ reserve_irq_vector_locked(irqd);
+ else
+ vector_assign_managed_shutdown(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+static int activate_reserved(struct irq_data *irqd)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int ret;
+
+ ret = assign_irq_vector_any_locked(irqd);
+ if (!ret)
+ apicd->has_reserved = false;
+ return ret;
+}
+
+static int activate_managed(struct irq_data *irqd)
+{
+ const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+ int ret;
+
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
+ /* Something in the core code broke! Survive gracefully */
+ pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
+ return EINVAL;
+ }
+
+ ret = assign_managed_vector(irqd, vector_searchmask);
+ /*
+ * This should not happen. The vector reservation got buggered. Handle
+ * it gracefully.
+ */
+ if (WARN_ON_ONCE(ret < 0)) {
+ pr_err("Managed startup irq %u, no vector available\n",
+ irqd->irq);
}
- data->move_in_progress = 0;
+ return ret;
}
-void init_irq_alloc_info(struct irq_alloc_info *info,
- const struct cpumask *mask)
+static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
+ bool early)
{
- memset(info, 0, sizeof(*info));
- info->mask = mask;
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ unsigned long flags;
+ int ret = 0;
+
+ trace_vector_activate(irqd->irq, apicd->is_managed,
+ apicd->can_reserve, early);
+
+ /* Nothing to do for fixed assigned vectors */
+ if (!apicd->can_reserve && !apicd->is_managed)
+ return 0;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (early || irqd_is_managed_and_shutdown(irqd))
+ vector_assign_managed_shutdown(irqd);
+ else if (apicd->is_managed)
+ ret = activate_managed(irqd);
+ else if (apicd->has_reserved)
+ ret = activate_reserved(irqd);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return ret;
}
-void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
+static void vector_free_reserved_and_managed(struct irq_data *irqd)
{
- if (src)
- *dst = *src;
- else
- memset(dst, 0, sizeof(*dst));
+ const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+ trace_vector_teardown(irqd->irq, apicd->is_managed,
+ apicd->has_reserved);
+
+ if (apicd->has_reserved)
+ irq_matrix_remove_reserved(vector_matrix);
+ if (apicd->is_managed)
+ irq_matrix_remove_managed(vector_matrix, dest);
}
static void x86_vector_free_irqs(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
- struct apic_chip_data *apic_data;
- struct irq_data *irq_data;
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
unsigned long flags;
int i;
for (i = 0; i < nr_irqs; i++) {
- irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
- if (irq_data && irq_data->chip_data) {
+ irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
+ if (irqd && irqd->chip_data) {
raw_spin_lock_irqsave(&vector_lock, flags);
- clear_irq_vector(virq + i, irq_data->chip_data);
- apic_data = irq_data->chip_data;
- irq_domain_reset_irq_data(irq_data);
+ clear_irq_vector(irqd);
+ vector_free_reserved_and_managed(irqd);
+ apicd = irqd->chip_data;
+ irq_domain_reset_irq_data(irqd);
raw_spin_unlock_irqrestore(&vector_lock, flags);
- free_apic_chip_data(apic_data);
-#ifdef CONFIG_X86_IO_APIC
- if (virq + i < nr_legacy_irqs())
- legacy_irq_data[virq + i] = NULL;
-#endif
+ free_apic_chip_data(apicd);
}
}
}
+static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
+ struct apic_chip_data *apicd)
+{
+ unsigned long flags;
+ bool realloc = false;
+
+ apicd->vector = ISA_IRQ_VECTOR(virq);
+ apicd->cpu = 0;
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ /*
+ * If the interrupt is activated, then it must stay at this vector
+ * position. That's usually the timer interrupt (0).
+ */
+ if (irqd_is_activated(irqd)) {
+ trace_vector_setup(virq, true, 0);
+ apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+ } else {
+ /* Release the vector */
+ apicd->can_reserve = true;
+ clear_irq_vector(irqd);
+ realloc = true;
+ }
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ return realloc;
+}
+
static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct irq_alloc_info *info = arg;
- struct apic_chip_data *data;
- struct irq_data *irq_data;
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
int i, err, node;
if (disable_apic)
@@ -350,34 +501,37 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
return -ENOSYS;
for (i = 0; i < nr_irqs; i++) {
- irq_data = irq_domain_get_irq_data(domain, virq + i);
- BUG_ON(!irq_data);
- node = irq_data_get_node(irq_data);
-#ifdef CONFIG_X86_IO_APIC
- if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
- data = legacy_irq_data[virq + i];
- else
-#endif
- data = alloc_apic_chip_data(node);
- if (!data) {
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+ BUG_ON(!irqd);
+ node = irq_data_get_node(irqd);
+ WARN_ON_ONCE(irqd->chip_data);
+ apicd = alloc_apic_chip_data(node);
+ if (!apicd) {
err = -ENOMEM;
goto error;
}
- irq_data->chip = &lapic_controller;
- irq_data->chip_data = data;
- irq_data->hwirq = virq + i;
- err = assign_irq_vector_policy(virq + i, node, data, info,
- irq_data);
- if (err)
- goto error;
+ apicd->irq = virq + i;
+ irqd->chip = &lapic_controller;
+ irqd->chip_data = apicd;
+ irqd->hwirq = virq + i;
+ irqd_set_single_target(irqd);
/*
- * If the apic destination mode is physical, then the
- * effective affinity is restricted to a single target
- * CPU. Mark the interrupt accordingly.
+ * Legacy vectors are already assigned when the IOAPIC
+ * takes them over. They stay on the same vector. This is
+ * required for check_timer() to work correctly as it might
+ * switch back to legacy mode. Only update the hardware
+ * config.
*/
- if (!apic->irq_dest_mode)
- irqd_set_single_target(irq_data);
+ if (info->flags & X86_IRQ_ALLOC_LEGACY) {
+ if (!vector_configure_legacy(virq + i, irqd, apicd))
+ continue;
+ }
+
+ err = assign_irq_vector_policy(irqd, info);
+ trace_vector_setup(virq + i, false, err);
+ if (err)
+ goto error;
}
return 0;
@@ -387,9 +541,56 @@ error:
return err;
}
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind)
+{
+ unsigned int cpu, vector, prev_cpu, prev_vector;
+ struct apic_chip_data *apicd;
+ unsigned long flags;
+ int irq;
+
+ if (!irqd) {
+ irq_matrix_debug_show(m, vector_matrix, ind);
+ return;
+ }
+
+ irq = irqd->irq;
+ if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
+ seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
+ seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
+ return;
+ }
+
+ apicd = irqd->chip_data;
+ if (!apicd) {
+ seq_printf(m, "%*sVector: Not assigned\n", ind, "");
+ return;
+ }
+
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ cpu = apicd->cpu;
+ vector = apicd->vector;
+ prev_cpu = apicd->prev_cpu;
+ prev_vector = apicd->prev_vector;
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+ seq_printf(m, "%*sVector: %5u\n", ind, "", vector);
+ seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu);
+ if (prev_vector) {
+ seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector);
+ seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu);
+ }
+}
+#endif
+
static const struct irq_domain_ops x86_vector_domain_ops = {
- .alloc = x86_vector_alloc_irqs,
- .free = x86_vector_free_irqs,
+ .alloc = x86_vector_alloc_irqs,
+ .free = x86_vector_free_irqs,
+ .activate = x86_vector_activate,
+ .deactivate = x86_vector_deactivate,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+ .debug_show = x86_vector_debug_show,
+#endif
};
int __init arch_probe_nr_irqs(void)
@@ -419,35 +620,40 @@ int __init arch_probe_nr_irqs(void)
return legacy_pic->probe();
}
-#ifdef CONFIG_X86_IO_APIC
-static void __init init_legacy_irqs(void)
+void lapic_assign_legacy_vector(unsigned int irq, bool replace)
{
- int i, node = cpu_to_node(0);
- struct apic_chip_data *data;
-
/*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * ISA_IRQ_VECTOR(i) for all cpu's.
+ * Use assign system here so it wont get accounted as allocated
+ * and moveable in the cpu hotplug check and it prevents managed
+ * irq reservation from touching it.
*/
- for (i = 0; i < nr_legacy_irqs(); i++) {
- data = legacy_irq_data[i] = alloc_apic_chip_data(node);
- BUG_ON(!data);
+ irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+}
+
+void __init lapic_assign_system_vectors(void)
+{
+ unsigned int i, vector = 0;
- data->cfg.vector = ISA_IRQ_VECTOR(i);
- cpumask_setall(data->domain);
- irq_set_chip_data(i, data);
+ for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+ irq_matrix_assign_system(vector_matrix, vector, false);
+
+ if (nr_legacy_irqs() > 1)
+ lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
+
+ /* System vectors are reserved, online it */
+ irq_matrix_online(vector_matrix);
+
+ /* Mark the preallocated legacy interrupts */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
}
}
-#else
-static inline void init_legacy_irqs(void) { }
-#endif
int __init arch_early_irq_init(void)
{
struct fwnode_handle *fn;
- init_legacy_irqs();
-
fn = irq_domain_alloc_named_fwnode("VECTOR");
BUG_ON(!fn);
x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
@@ -459,100 +665,115 @@ int __init arch_early_irq_init(void)
arch_init_msi_domain(x86_vector_domain);
arch_init_htirq_domain(x86_vector_domain);
- BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
- BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
+
+ /*
+ * Allocate the vector matrix allocator data structure and limit the
+ * search area.
+ */
+ vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
+ FIRST_SYSTEM_VECTOR);
+ BUG_ON(!vector_matrix);
return arch_early_ioapic_init();
}
-/* Initialize vector_irq on a new cpu */
-static void __setup_vector_irq(int cpu)
+#ifdef CONFIG_SMP
+
+static struct irq_desc *__setup_vector_irq(int vector)
{
- struct apic_chip_data *data;
- struct irq_desc *desc;
- int irq, vector;
+ int isairq = vector - ISA_IRQ_VECTOR(0);
+
+ /* Check whether the irq is in the legacy space */
+ if (isairq < 0 || isairq >= nr_legacy_irqs())
+ return VECTOR_UNUSED;
+ /* Check whether the irq is handled by the IOAPIC */
+ if (test_bit(isairq, &io_apic_irqs))
+ return VECTOR_UNUSED;
+ return irq_to_desc(isairq);
+}
- /* Mark the inuse vectors */
- for_each_irq_desc(irq, desc) {
- struct irq_data *idata = irq_desc_get_irq_data(desc);
+/* Online the local APIC infrastructure and initialize the vectors */
+void lapic_online(void)
+{
+ unsigned int vector;
- data = apic_chip_data(idata);
- if (!data || !cpumask_test_cpu(cpu, data->domain))
- continue;
- vector = data->cfg.vector;
- per_cpu(vector_irq, cpu)[vector] = desc;
- }
- /* Mark the free vectors */
- for (vector = 0; vector < NR_VECTORS; ++vector) {
- desc = per_cpu(vector_irq, cpu)[vector];
- if (IS_ERR_OR_NULL(desc))
- continue;
+ lockdep_assert_held(&vector_lock);
- data = apic_chip_data(irq_desc_get_irq_data(desc));
- if (!cpumask_test_cpu(cpu, data->domain))
- per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
- }
+ /* Online the vector matrix array for this CPU */
+ irq_matrix_online(vector_matrix);
+
+ /*
+ * The interrupt affinity logic never targets interrupts to offline
+ * CPUs. The exception are the legacy PIC interrupts. In general
+ * they are only targeted to CPU0, but depending on the platform
+ * they can be distributed to any online CPU in hardware. The
+ * kernel has no influence on that. So all active legacy vectors
+ * must be installed on all CPUs. All non legacy interrupts can be
+ * cleared.
+ */
+ for (vector = 0; vector < NR_VECTORS; vector++)
+ this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
}
-/*
- * Setup the vector to irq mappings. Must be called with vector_lock held.
- */
-void setup_vector_irq(int cpu)
+void lapic_offline(void)
{
- int irq;
+ lock_vector_lock();
+ irq_matrix_offline(vector_matrix);
+ unlock_vector_lock();
+}
+
+static int apic_set_affinity(struct irq_data *irqd,
+ const struct cpumask *dest, bool force)
+{
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
+ int err;
- lockdep_assert_held(&vector_lock);
/*
- * On most of the platforms, legacy PIC delivers the interrupts on the
- * boot cpu. But there are certain platforms where PIC interrupts are
- * delivered to multiple cpu's. If the legacy IRQ is handled by the
- * legacy PIC, for the new cpu that is coming online, setup the static
- * legacy vector to irq mapping:
+ * Core code can call here for inactive interrupts. For inactive
+ * interrupts which use managed or reservation mode there is no
+ * point in going through the vector assignment right now as the
+ * activation will assign a vector which fits the destination
+ * cpumask. Let the core code store the destination mask and be
+ * done with it.
*/
- for (irq = 0; irq < nr_legacy_irqs(); irq++)
- per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
+ if (!irqd_is_activated(irqd) &&
+ (apicd->is_managed || apicd->can_reserve))
+ return IRQ_SET_MASK_OK;
- __setup_vector_irq(cpu);
+ raw_spin_lock(&vector_lock);
+ cpumask_and(vector_searchmask, dest, cpu_online_mask);
+ if (irqd_affinity_is_managed(irqd))
+ err = assign_managed_vector(irqd, vector_searchmask);
+ else
+ err = assign_vector_locked(irqd, vector_searchmask);
+ raw_spin_unlock(&vector_lock);
+ return err ? err : IRQ_SET_MASK_OK;
}
-static int apic_retrigger_irq(struct irq_data *irq_data)
+#else
+# define apic_set_affinity NULL
+#endif
+
+static int apic_retrigger_irq(struct irq_data *irqd)
{
- struct apic_chip_data *data = apic_chip_data(irq_data);
+ struct apic_chip_data *apicd = apic_chip_data(irqd);
unsigned long flags;
- int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
- cpu = cpumask_first_and(data->domain, cpu_online_mask);
- apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
+ apic->send_IPI(apicd->cpu, apicd->vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
}
-void apic_ack_edge(struct irq_data *data)
+void apic_ack_edge(struct irq_data *irqd)
{
- irq_complete_move(irqd_cfg(data));
- irq_move_irq(data);
+ irq_complete_move(irqd_cfg(irqd));
+ irq_move_irq(irqd);
ack_APIC_irq();
}
-static int apic_set_affinity(struct irq_data *irq_data,
- const struct cpumask *dest, bool force)
-{
- struct apic_chip_data *data = irq_data->chip_data;
- int err, irq = irq_data->irq;
-
- if (!IS_ENABLED(CONFIG_SMP))
- return -EPERM;
-
- if (!cpumask_intersects(dest, cpu_online_mask))
- return -EINVAL;
-
- err = assign_irq_vector(irq, data, dest, irq_data);
- return err ? err : IRQ_SET_MASK_OK;
-}
-
static struct irq_chip lapic_controller = {
.name = "APIC",
.irq_ack = apic_ack_edge,
@@ -561,115 +782,98 @@ static struct irq_chip lapic_controller = {
};
#ifdef CONFIG_SMP
-static void __send_cleanup_vector(struct apic_chip_data *data)
-{
- raw_spin_lock(&vector_lock);
- cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
- data->move_in_progress = 0;
- if (!cpumask_empty(data->old_domain))
- apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
- raw_spin_unlock(&vector_lock);
-}
-void send_cleanup_vector(struct irq_cfg *cfg)
+static void free_moved_vector(struct apic_chip_data *apicd)
{
- struct apic_chip_data *data;
+ unsigned int vector = apicd->prev_vector;
+ unsigned int cpu = apicd->prev_cpu;
+ bool managed = apicd->is_managed;
- data = container_of(cfg, struct apic_chip_data, cfg);
- if (data->move_in_progress)
- __send_cleanup_vector(data);
+ /*
+ * This should never happen. Managed interrupts are not
+ * migrated except on CPU down, which does not involve the
+ * cleanup vector. But try to keep the accounting correct
+ * nevertheless.
+ */
+ WARN_ON_ONCE(managed);
+
+ trace_vector_free_moved(apicd->irq, cpu, vector, managed);
+ irq_matrix_free(vector_matrix, cpu, vector, managed);
+ per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+ hlist_del_init(&apicd->clist);
+ apicd->prev_vector = 0;
+ apicd->move_in_progress = 0;
}
asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
{
- unsigned vector, me;
+ struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
+ struct apic_chip_data *apicd;
+ struct hlist_node *tmp;
entering_ack_irq();
-
/* Prevent vectors vanishing under us */
raw_spin_lock(&vector_lock);
- me = smp_processor_id();
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- struct apic_chip_data *data;
- struct irq_desc *desc;
- unsigned int irr;
-
- retry:
- desc = __this_cpu_read(vector_irq[vector]);
- if (IS_ERR_OR_NULL(desc))
- continue;
-
- if (!raw_spin_trylock(&desc->lock)) {
- raw_spin_unlock(&vector_lock);
- cpu_relax();
- raw_spin_lock(&vector_lock);
- goto retry;
- }
-
- data = apic_chip_data(irq_desc_get_irq_data(desc));
- if (!data)
- goto unlock;
+ hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+ unsigned int irr, vector = apicd->prev_vector;
/*
- * Nothing to cleanup if irq migration is in progress
- * or this cpu is not set in the cleanup mask.
- */
- if (data->move_in_progress ||
- !cpumask_test_cpu(me, data->old_domain))
- goto unlock;
-
- /*
- * We have two cases to handle here:
- * 1) vector is unchanged but the target mask got reduced
- * 2) vector and the target mask has changed
- *
- * #1 is obvious, but in #2 we have two vectors with the same
- * irq descriptor: the old and the new vector. So we need to
- * make sure that we only cleanup the old vector. The new
- * vector has the current @vector number in the config and
- * this cpu is part of the target mask. We better leave that
- * one alone.
- */
- if (vector == data->cfg.vector &&
- cpumask_test_cpu(me, data->domain))
- goto unlock;
-
- irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
- /*
- * Check if the vector that needs to be cleanedup is
- * registered at the cpu's IRR. If so, then this is not
- * the best time to clean it up. Lets clean it up in the
+ * Paranoia: Check if the vector that needs to be cleaned
+ * up is registered at the APICs IRR. If so, then this is
+ * not the best time to clean it up. Clean it up in the
* next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
- * to myself.
+ * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
+ * priority external vector, so on return from this
+ * interrupt the device interrupt will happen first.
*/
- if (irr & (1 << (vector % 32))) {
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ if (irr & (1U << (vector % 32))) {
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
- goto unlock;
+ continue;
}
- __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
- cpumask_clear_cpu(me, data->old_domain);
-unlock:
- raw_spin_unlock(&desc->lock);
+ free_moved_vector(apicd);
}
raw_spin_unlock(&vector_lock);
-
exiting_irq();
}
+static void __send_cleanup_vector(struct apic_chip_data *apicd)
+{
+ unsigned int cpu;
+
+ raw_spin_lock(&vector_lock);
+ apicd->move_in_progress = 0;
+ cpu = apicd->prev_cpu;
+ if (cpu_online(cpu)) {
+ hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+ apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ apicd->prev_vector = 0;
+ }
+ raw_spin_unlock(&vector_lock);
+}
+
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ struct apic_chip_data *apicd;
+
+ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+ if (apicd->move_in_progress)
+ __send_cleanup_vector(apicd);
+}
+
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
- unsigned me;
- struct apic_chip_data *data;
+ struct apic_chip_data *apicd;
- data = container_of(cfg, struct apic_chip_data, cfg);
- if (likely(!data->move_in_progress))
+ apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+ if (likely(!apicd->move_in_progress))
return;
- me = smp_processor_id();
- if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
- __send_cleanup_vector(data);
+ if (vector == apicd->vector && apicd->cpu == smp_processor_id())
+ __send_cleanup_vector(apicd);
}
void irq_complete_move(struct irq_cfg *cfg)
@@ -682,10 +886,9 @@ void irq_complete_move(struct irq_cfg *cfg)
*/
void irq_force_complete_move(struct irq_desc *desc)
{
- struct irq_data *irqdata;
- struct apic_chip_data *data;
- struct irq_cfg *cfg;
- unsigned int cpu;
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
+ unsigned int vector;
/*
* The function is called for all descriptors regardless of which
@@ -696,43 +899,31 @@ void irq_force_complete_move(struct irq_desc *desc)
* Check first that the chip_data is what we expect
* (apic_chip_data) before touching it any further.
*/
- irqdata = irq_domain_get_irq_data(x86_vector_domain,
- irq_desc_get_irq(desc));
- if (!irqdata)
+ irqd = irq_domain_get_irq_data(x86_vector_domain,
+ irq_desc_get_irq(desc));
+ if (!irqd)
return;
- data = apic_chip_data(irqdata);
- cfg = data ? &data->cfg : NULL;
+ raw_spin_lock(&vector_lock);
+ apicd = apic_chip_data(irqd);
+ if (!apicd)
+ goto unlock;
- if (!cfg)
- return;
+ /*
+ * If prev_vector is empty, no action required.
+ */
+ vector = apicd->prev_vector;
+ if (!vector)
+ goto unlock;
/*
- * This is tricky. If the cleanup of @data->old_domain has not been
+ * This is tricky. If the cleanup of the old vector has not been
* done yet, then the following setaffinity call will fail with
* -EBUSY. This can leave the interrupt in a stale state.
*
* All CPUs are stuck in stop machine with interrupts disabled so
* calling __irq_complete_move() would be completely pointless.
- */
- raw_spin_lock(&vector_lock);
- /*
- * Clean out all offline cpus (including the outgoing one) from the
- * old_domain mask.
- */
- cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-
- /*
- * If move_in_progress is cleared and the old_domain mask is empty,
- * then there is nothing to cleanup. fixup_irqs() will take care of
- * the stale vectors on the outgoing cpu.
- */
- if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
- raw_spin_unlock(&vector_lock);
- return;
- }
-
- /*
+ *
* 1) The interrupt is in move_in_progress state. That means that we
* have not seen an interrupt since the io_apic was reprogrammed to
* the new vector.
@@ -740,7 +931,7 @@ void irq_force_complete_move(struct irq_desc *desc)
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
* have not been processed yet.
*/
- if (data->move_in_progress) {
+ if (apicd->move_in_progress) {
/*
* In theory there is a race:
*
@@ -774,21 +965,43 @@ void irq_force_complete_move(struct irq_desc *desc)
* area arises.
*/
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
- irqdata->irq, cfg->old_vector);
+ irqd->irq, vector);
}
- /*
- * If old_domain is not empty, then other cpus still have the irq
- * descriptor set in their vector array. Clean it up.
- */
- for_each_cpu(cpu, data->old_domain)
- per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
+ free_moved_vector(apicd);
+unlock:
+ raw_spin_unlock(&vector_lock);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Note, this is not accurate accounting, but at least good enough to
+ * prevent that the actual interrupt move will run out of vectors.
+ */
+int lapic_can_unplug_cpu(void)
+{
+ unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
+ int ret = 0;
- /* Cleanup the left overs of the (half finished) move */
- cpumask_clear(data->old_domain);
- data->move_in_progress = 0;
+ raw_spin_lock(&vector_lock);
+ tomove = irq_matrix_allocated(vector_matrix);
+ avl = irq_matrix_available(vector_matrix, true);
+ if (avl < tomove) {
+ pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
+ cpu, tomove, avl);
+ ret = -ENOSPC;
+ goto out;
+ }
+ rsvd = irq_matrix_reserved(vector_matrix);
+ if (avl < rsvd) {
+ pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
+ rsvd, avl);
+ }
+out:
raw_spin_unlock(&vector_lock);
+ return ret;
}
-#endif
+#endif /* HOTPLUG_CPU */
+#endif /* SMP */
static void __init print_APIC_field(int base)
{
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
new file mode 100644
index 000000000000..b107de381cb5
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -0,0 +1,9 @@
+/* Common bits for X2APIC cluster/physical modes. */
+
+int x2apic_apic_id_valid(int apicid);
+int x2apic_apic_id_registered(void);
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
+unsigned int x2apic_get_apic_id(unsigned long id);
+u32 x2apic_set_apic_id(unsigned int id);
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
+void x2apic_send_IPI_self(int vector);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e216cf3d64d2..622f13ca8a94 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -9,22 +9,24 @@
#include <linux/cpu.h>
#include <asm/smp.h>
-#include <asm/x2apic.h>
+#include "x2apic.h"
+
+struct cluster_mask {
+ unsigned int clusterid;
+ int node;
+ struct cpumask mask;
+};
static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static struct cluster_mask *cluster_hotplug_mask;
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return x2apic_enabled();
}
-static inline u32 x2apic_cluster(int cpu)
-{
- return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
-}
-
static void x2apic_send_IPI(int cpu, int vector)
{
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
@@ -36,49 +38,34 @@ static void x2apic_send_IPI(int cpu, int vector)
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
- struct cpumask *cpus_in_cluster_ptr;
- struct cpumask *ipi_mask_ptr;
- unsigned int cpu, this_cpu;
+ unsigned int cpu, clustercpu;
+ struct cpumask *tmpmsk;
unsigned long flags;
u32 dest;
x2apic_wrmsr_fence();
-
local_irq_save(flags);
- this_cpu = smp_processor_id();
+ tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+ cpumask_copy(tmpmsk, mask);
+ /* If IPI should not be sent to self, clear current CPU */
+ if (apic_dest != APIC_DEST_ALLINC)
+ cpumask_clear_cpu(smp_processor_id(), tmpmsk);
- /*
- * We are to modify mask, so we need an own copy
- * and be sure it's manipulated with irq off.
- */
- ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
- cpumask_copy(ipi_mask_ptr, mask);
-
- /*
- * The idea is to send one IPI per cluster.
- */
- for_each_cpu(cpu, ipi_mask_ptr) {
- unsigned long i;
+ /* Collapse cpus in a cluster so a single IPI per cluster is sent */
+ for_each_cpu(cpu, tmpmsk) {
+ struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
- cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
dest = 0;
-
- /* Collect cpus in cluster. */
- for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
- if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
- dest |= per_cpu(x86_cpu_to_logical_apicid, i);
- }
+ for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+ dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
if (!dest)
continue;
__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
- /*
- * Cluster sibling cpus should be discared now so
- * we would not send IPI them second time.
- */
- cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+ /* Remove cluster CPUs from tmpmask */
+ cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
}
local_irq_restore(flags);
@@ -105,125 +92,90 @@ static void x2apic_send_IPI_all(int vector)
__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
}
-static int
-x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
- unsigned int *apicid)
+static u32 x2apic_calc_apicid(unsigned int cpu)
{
- struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
- unsigned int cpu;
- u32 dest = 0;
- u16 cluster;
-
- cpu = cpumask_first(mask);
- if (cpu >= nr_cpu_ids)
- return -EINVAL;
-
- dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
- cluster = x2apic_cluster(cpu);
-
- cpumask_clear(effmsk);
- for_each_cpu(cpu, mask) {
- if (cluster != x2apic_cluster(cpu))
- continue;
- dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
- cpumask_set_cpu(cpu, effmsk);
- }
-
- *apicid = dest;
- return 0;
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
}
static void init_x2apic_ldr(void)
{
- unsigned int this_cpu = smp_processor_id();
+ struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
+ u32 cluster, apicid = apic_read(APIC_LDR);
unsigned int cpu;
- per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+ this_cpu_write(x86_cpu_to_logical_apicid, apicid);
+
+ if (cmsk)
+ goto update;
- cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+ cluster = apicid >> 16;
for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
- cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ cmsk = per_cpu(cluster_masks, cpu);
+ /* Matching cluster found. Link and update it. */
+ if (cmsk && cmsk->clusterid == cluster)
+ goto update;
}
+ cmsk = cluster_hotplug_mask;
+ cluster_hotplug_mask = NULL;
+update:
+ this_cpu_write(cluster_masks, cmsk);
+ cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
}
-/*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int x2apic_prepare_cpu(unsigned int cpu)
+static int alloc_clustermask(unsigned int cpu, int node)
{
- if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
- return -ENOMEM;
+ if (per_cpu(cluster_masks, cpu))
+ return 0;
+ /*
+ * If a hotplug spare mask exists, check whether it's on the right
+ * node. If not, free it and allocate a new one.
+ */
+ if (cluster_hotplug_mask) {
+ if (cluster_hotplug_mask->node == node)
+ return 0;
+ kfree(cluster_hotplug_mask);
+ }
- if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
- free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+ cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
+ GFP_KERNEL, node);
+ if (!cluster_hotplug_mask)
return -ENOMEM;
- }
+ cluster_hotplug_mask->node = node;
+ return 0;
+}
+static int x2apic_prepare_cpu(unsigned int cpu)
+{
+ if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+ return -ENOMEM;
+ if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
+ return -ENOMEM;
return 0;
}
-static int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int dead_cpu)
{
- int cpu;
+ struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
- for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
- cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
- }
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+ cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
static int x2apic_cluster_probe(void)
{
- int cpu = smp_processor_id();
- int ret;
-
if (!x2apic_mode)
return 0;
- ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
- x2apic_prepare_cpu, x2apic_dead_cpu);
- if (ret < 0) {
+ if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
+ x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
pr_err("Failed to register X2APIC_PREPARE\n");
return 0;
}
- cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+ init_x2apic_ldr();
return 1;
}
-static const struct cpumask *x2apic_cluster_target_cpus(void)
-{
- return cpu_all_mask;
-}
-
-/*
- * Each x2apic cluster is an allocation domain.
- */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- /*
- * To minimize vector pressure, default case of boot, device bringup
- * etc will use a single cpu for the interrupt destination.
- *
- * On explicit migration requests coming from irqbalance etc,
- * interrupts will be routed to the x2apic cluster (cluster-id
- * derived from the first cpu in the mask) members specified
- * in the mask.
- */
- if (mask == x2apic_cluster_target_cpus())
- cpumask_copy(retmask, cpumask_of(cpu));
- else
- cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
-}
-
static struct apic apic_x2apic_cluster __ro_after_init = {
.name = "cluster x2apic",
@@ -235,12 +187,10 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
- .target_cpus = x2apic_cluster_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .vector_allocation_domain = cluster_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -253,7 +203,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
- .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .calc_dest_apicid = x2apic_calc_apicid,
.send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index b94d35320f85..f8d9d69994e6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -7,7 +7,8 @@
#include <linux/dmar.h>
#include <asm/smp.h>
-#include <asm/x2apic.h>
+#include <asm/ipi.h>
+#include "x2apic.h"
int x2apic_phys;
@@ -99,6 +100,43 @@ static int x2apic_phys_probe(void)
return apic == &apic_x2apic_phys;
}
+/* Common x2apic functions, also used by x2apic_cluster */
+int x2apic_apic_id_valid(int apicid)
+{
+ return 1;
+}
+
+int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+ unsigned long cfg = __prepare_ICR(0, vector, dest);
+ native_x2apic_icr_write(cfg, apicid);
+}
+
+unsigned int x2apic_get_apic_id(unsigned long id)
+{
+ return id;
+}
+
+u32 x2apic_set_apic_id(unsigned int id)
+{
+ return id;
+}
+
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
+{
+ return initial_apicid >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
static struct apic apic_x2apic_phys __ro_after_init = {
.name = "physical x2apic",
@@ -110,12 +148,10 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -128,7 +164,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_default_calc_apicid,
.send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0d57bb9079c9..5832df6d9c37 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -525,16 +525,9 @@ static void uv_init_apic_ldr(void)
{
}
-static int
-uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
- unsigned int *apicid)
+static u32 apic_uv_calc_apicid(unsigned int cpu)
{
- int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
-
- if (!ret)
- *apicid |= uv_apicid_hibits;
-
- return ret;
+ return apic_default_calc_apicid(cpu) | uv_apicid_hibits;
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -547,7 +540,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
return id;
}
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
{
/* CHECKME: Do we need to mask out the xapic extra bits? */
return id;
@@ -584,12 +577,10 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* Physical */
- .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
- .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = uv_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -602,7 +593,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_uv_calc_apicid,
.send_IPI = uv_send_IPI_one,
.send_IPI_mask = uv_send_IPI_mask,
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 8f5cb2c7060c..86c4439f9d74 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq)
io_apic_irqs &= ~(1<<irq);
irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
enable_irq(irq);
+ lapic_assign_legacy_vector(irq, true);
}
/*
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1cb8d5..723fa9782186 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -225,7 +225,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy
idt_init_desc(&desc, t);
write_idt_entry(idt, t->vector, &desc);
if (sys)
- set_bit(t->vector, used_vectors);
+ set_bit(t->vector, system_vectors);
}
}
@@ -313,14 +313,14 @@ void __init idt_setup_apic_and_irq_gates(void)
idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
- for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+ for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
set_intr_gate(i, entry);
}
- for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+ for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
#ifdef CONFIG_X86_LOCAL_APIC
- set_bit(i, used_vectors);
+ set_bit(i, system_vectors);
set_intr_gate(i, spurious_interrupt);
#else
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
@@ -358,7 +358,7 @@ void idt_invalidate(void *addr)
void __init update_intr_gate(unsigned int n, const void *addr)
{
- if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+ if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
return;
set_intr_gate(n, addr);
}
@@ -366,6 +366,6 @@ void __init update_intr_gate(unsigned int n, const void *addr)
void alloc_intr_gate(unsigned int n, const void *addr)
{
BUG_ON(n < FIRST_SYSTEM_VECTOR);
- if (!test_and_set_bit(n, used_vectors))
+ if (!test_and_set_bit(n, system_vectors))
set_intr_gate(n, addr);
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 52089c043160..49cfd9fe7589 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Machine check polls\n");
#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
- if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
+ if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
@@ -333,105 +333,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
#ifdef CONFIG_HOTPLUG_CPU
-
-/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
- * below, which is protected by stop_machine(). Putting them on the stack
- * results in a stack frame overflow. Dynamically allocating could result in a
- * failure so declare these two cpumasks as global.
- */
-static struct cpumask affinity_new, online_new;
-
-/*
- * This cpu is going to be removed and its vectors migrated to the remaining
- * online cpus. Check to see if there are enough vectors in the remaining cpus.
- * This function is protected by stop_machine().
- */
-int check_irq_vectors_for_cpu_disable(void)
-{
- unsigned int this_cpu, vector, this_count, count;
- struct irq_desc *desc;
- struct irq_data *data;
- int cpu;
-
- this_cpu = smp_processor_id();
- cpumask_copy(&online_new, cpu_online_mask);
- cpumask_clear_cpu(this_cpu, &online_new);
-
- this_count = 0;
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- desc = __this_cpu_read(vector_irq[vector]);
- if (IS_ERR_OR_NULL(desc))
- continue;
- /*
- * Protect against concurrent action removal, affinity
- * changes etc.
- */
- raw_spin_lock(&desc->lock);
- data = irq_desc_get_irq_data(desc);
- cpumask_copy(&affinity_new,
- irq_data_get_affinity_mask(data));
- cpumask_clear_cpu(this_cpu, &affinity_new);
-
- /* Do not count inactive or per-cpu irqs. */
- if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
- raw_spin_unlock(&desc->lock);
- continue;
- }
-
- raw_spin_unlock(&desc->lock);
- /*
- * A single irq may be mapped to multiple cpu's
- * vector_irq[] (for example IOAPIC cluster mode). In
- * this case we have two possibilities:
- *
- * 1) the resulting affinity mask is empty; that is
- * this the down'd cpu is the last cpu in the irq's
- * affinity mask, or
- *
- * 2) the resulting affinity mask is no longer a
- * subset of the online cpus but the affinity mask is
- * not zero; that is the down'd cpu is the last online
- * cpu in a user set affinity mask.
- */
- if (cpumask_empty(&affinity_new) ||
- !cpumask_subset(&affinity_new, &online_new))
- this_count++;
- }
- /* No need to check any further. */
- if (!this_count)
- return 0;
-
- count = 0;
- for_each_online_cpu(cpu) {
- if (cpu == this_cpu)
- continue;
- /*
- * We scan from FIRST_EXTERNAL_VECTOR to first system
- * vector. If the vector is marked in the used vectors
- * bitmap or an irq is assigned to it, we don't count
- * it as available.
- *
- * As this is an inaccurate snapshot anyway, we can do
- * this w/o holding vector_lock.
- */
- for (vector = FIRST_EXTERNAL_VECTOR;
- vector < FIRST_SYSTEM_VECTOR; vector++) {
- if (!test_bit(vector, used_vectors) &&
- IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
- if (++count == this_count)
- return 0;
- }
- }
- }
-
- if (count < this_count) {
- pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
- this_cpu, this_count, count);
- return -ERANGE;
- }
- return 0;
-}
-
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
{
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1e4094eba15e..8da3e909e967 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,9 +61,6 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip;
int i;
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
- init_bsp_APIC();
-#endif
legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++)
@@ -94,6 +91,7 @@ void __init native_init_IRQ(void)
x86_init.irqs.pre_vector_init();
idt_setup_apic_and_irq_gates();
+ lapic_assign_system_vectors();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0957dd73d127..82559867e0a9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -136,18 +136,6 @@ RESERVE_BRK(dmi_alloc, 65536);
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
-#ifdef CONFIG_X86_64
-int default_cpu_present_to_apicid(int mps_cpu)
-{
- return __default_cpu_present_to_apicid(mps_cpu);
-}
-
-int default_check_phys_apicid_present(int phys_apicid)
-{
- return __default_check_phys_apicid_present(phys_apicid);
-}
-#endif
-
struct boot_params boot_params;
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd84de7..92aadfa30d61 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -254,14 +254,14 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();
/*
- * Lock vector_lock and initialize the vectors on this cpu
- * before setting the cpu online. We must set it online with
- * vector_lock held to prevent a concurrent setup/teardown
- * from seeing a half valid vector space.
+ * Lock vector_lock, set CPU online and bring the vector
+ * allocator online. Online must be set with vector_lock held
+ * to prevent a concurrent irq setup/teardown from seeing a
+ * half valid vector space.
*/
lock_vector_lock();
- setup_vector_irq(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
+ lapic_online();
unlock_vector_lock();
cpu_set_state_online(smp_processor_id());
x86_platform.nmi_init();
@@ -1190,17 +1190,10 @@ static __init void disable_smp(void)
cpumask_set_cpu(0, topology_core_cpumask(0));
}
-enum {
- SMP_OK,
- SMP_NO_CONFIG,
- SMP_NO_APIC,
- SMP_FORCE_UP,
-};
-
/*
* Various sanity checks.
*/
-static int __init smp_sanity_check(unsigned max_cpus)
+static void __init smp_sanity_check(void)
{
preempt_disable();
@@ -1238,16 +1231,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
}
/*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config && !acpi_lapic) {
- preempt_enable();
- pr_notice("SMP motherboard not detected\n");
- return SMP_NO_CONFIG;
- }
-
- /*
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
*/
@@ -1257,29 +1240,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
}
preempt_enable();
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(boot_cpu_apic_version) &&
- !boot_cpu_has(X86_FEATURE_APIC)) {
- if (!disable_apic) {
- pr_err("BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
- }
- return SMP_NO_APIC;
- }
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- pr_info("SMP mode deactivated\n");
- return SMP_FORCE_UP;
- }
-
- return SMP_OK;
}
static void __init smp_cpu_index_default(void)
@@ -1294,9 +1254,18 @@ static void __init smp_cpu_index_default(void)
}
}
+static void __init smp_get_logical_apicid(void)
+{
+ if (x2apic_mode)
+ cpu0_logical_apicid = apic_read(APIC_LDR);
+ else
+ cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+}
+
/*
- * Prepare for SMP bootup. The MP table or ACPI has been read
- * earlier. Just do some sanity checking here and enable APIC mode.
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ * for common interface support.
*/
void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
@@ -1328,31 +1297,27 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_cpu_sibling_map(0);
- switch (smp_sanity_check(max_cpus)) {
- case SMP_NO_CONFIG:
- disable_smp();
- if (APIC_init_uniprocessor())
- pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
- return;
- case SMP_NO_APIC:
+ smp_sanity_check();
+
+ switch (apic_intr_mode) {
+ case APIC_PIC:
+ case APIC_VIRTUAL_WIRE_NO_CONFIG:
disable_smp();
return;
- case SMP_FORCE_UP:
+ case APIC_SYMMETRIC_IO_NO_ROUTING:
disable_smp();
- apic_bsp_setup(false);
+ /* Setup local timer */
+ x86_init.timers.setup_percpu_clockev();
return;
- case SMP_OK:
+ case APIC_VIRTUAL_WIRE:
+ case APIC_SYMMETRIC_IO:
break;
}
- if (read_apic_id() != boot_cpu_physical_apicid) {
- panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- read_apic_id(), boot_cpu_physical_apicid);
- /* Or can we switch back to PIC here? */
- }
+ /* Setup local timer */
+ x86_init.timers.setup_percpu_clockev();
- default_setup_apic_routing();
- cpu0_logical_apicid = apic_bsp_setup(false);
+ smp_get_logical_apicid();
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
@@ -1395,7 +1360,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
nmi_selftest();
impress_friends();
- setup_ioapic_dest();
mtrr_aps_init();
}
@@ -1554,13 +1518,14 @@ void cpu_disable_common(void)
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs();
+ lapic_offline();
}
int native_cpu_disable(void)
{
int ret;
- ret = check_irq_vectors_for_cpu_disable();
+ ret = lapic_can_unplug_cpu();
if (ret)
return ret;
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 879af864d99a..749d189f8cd4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -85,6 +85,11 @@ void __init hpet_time_init(void)
static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init();
+ /*
+ * After PIT/HPET timers init, select and setup
+ * the final interrupt mode for delivering IRQs.
+ */
+ x86_init.irqs.intr_mode_init();
tsc_init();
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f43309e..a5791f3d3f84 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -71,7 +71,7 @@
#include <asm/proto.h>
#endif
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
+DECLARE_BITMAP(system_vectors, NR_VECTORS);
static inline void cond_local_irq_enable(struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index b034b1b14b9c..44685fb2a192 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,9 +26,6 @@
#define TOPOLOGY_REGISTER_OFFSET 0x10
-/* Flag below is initialized once during vSMP PCI initialization. */
-static int irq_routing_comply = 1;
-
#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
/*
* Interrupt control on vSMPowered systems:
@@ -105,9 +102,6 @@ static void __init set_vsmp_pv_ops(void)
if (cap & ctl & BIT(8)) {
ctl &= ~BIT(8);
- /* Interrupt routing set to ignore */
- irq_routing_comply = 0;
-
#ifdef CONFIG_PROC_FS
/* Don't let users change irq affinity via procfs */
no_irq_affinity = 1;
@@ -211,23 +205,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
return hard_smp_processor_id() >> index_msb;
}
-/*
- * In vSMP, all cpus should be capable of handling interrupts, regardless of
- * the APIC used.
- */
-static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask)
-{
- cpumask_setall(retmask);
-}
-
static void vsmp_apic_post_init(void)
{
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
-
- if (!irq_routing_comply)
- apic->vector_allocation_domain = fill_vector_allocation_domain;
}
void __init vsmp_init(void)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a088b2c47f73..a7889b93e438 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -55,6 +55,7 @@ struct x86_init_ops x86_init __initdata = {
.pre_vector_init = init_ISA_irqs,
.intr_init = native_init_IRQ,
.trap_init = x86_init_noop,
+ .intr_mode_init = apic_intr_mode_init
},
.oem = {
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 03fc397335b7..5f6fd860820a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -127,10 +127,11 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
-static void uv_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static int uv_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
+ return 0;
}
/*
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 30434b8708f2..6b830d4cb4c8 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
return 0xfd;
}
-static unsigned long xen_set_apic_id(unsigned int x)
+static u32 xen_set_apic_id(unsigned int x)
{
WARN_ON(1);
return x;
@@ -161,12 +161,10 @@ static struct apic xen_pv_apic = {
/* .irq_delivery_mode - used in native_compose_msi_msg only */
/* .irq_dest_mode - used in native_compose_msi_msg only */
- .target_cpus = default_target_cpus,
.disable_esr = 0,
/* .dest_logical - default_send_IPI_ use it but we use our own. */
.check_apicid_used = default_check_apicid_used, /* Used on 32-bit */
- .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = xen_noop, /* setup_local_APIC calls it */
.ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */
@@ -179,7 +177,7 @@ static struct apic xen_pv_apic = {
.get_apic_id = xen_get_apic_id,
.set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .calc_dest_apicid = apic_flat_calc_apicid,
#ifdef CONFIG_SMP
.send_IPI_mask = xen_send_IPI_mask,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d4396e27b1fb..7b3b17f2ab50 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1231,6 +1231,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_platform.get_nmi_reason = xen_get_nmi_reason;
x86_init.resources.memory_setup = xen_memory_setup;
+ x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c
index 033258634b8c..b5843fe6a44d 100644
--- a/drivers/gpio/gpio-xgene-sb.c
+++ b/drivers/gpio/gpio-xgene-sb.c
@@ -140,8 +140,9 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
return irq_create_fwspec_mapping(&fwspec);
}
-static void xgene_gpio_sb_domain_activate(struct irq_domain *d,
- struct irq_data *irq_data)
+static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
+ struct irq_data *irq_data,
+ bool early)
{
struct xgene_gpio_sb *priv = d->host_data;
u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
@@ -150,11 +151,12 @@ static void xgene_gpio_sb_domain_activate(struct irq_domain *d,
dev_err(priv->gc.parent,
"Unable to configure XGene GPIO standby pin %d as IRQ\n",
gpio);
- return;
+ return -ENOSPC;
}
xgene_gpio_set_bit(&priv->gc, priv->regs + MPA_GPIO_SEL_LO,
gpio * 2, 1);
+ return 0;
}
static void xgene_gpio_sb_domain_deactivate(struct irq_domain *d,
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 8e8874d23717..9c848e36f209 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4173,16 +4173,26 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}
-static void irq_remapping_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
+ struct amd_ir_data *ir_data,
+ struct irq_2_irte *irte_info,
+ struct irq_cfg *cfg);
+
+static int irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
struct amd_ir_data *data = irq_data->chip_data;
struct irq_2_irte *irte_info = &data->irq_2_irte;
struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
- if (iommu)
- iommu->irte_ops->activate(data->entry, irte_info->devid,
- irte_info->index);
+ if (!iommu)
+ return 0;
+
+ iommu->irte_ops->activate(data->entry, irte_info->devid,
+ irte_info->index);
+ amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg);
+ return 0;
}
static void irq_remapping_deactivate(struct irq_domain *domain,
@@ -4269,6 +4279,22 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
}
+
+static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
+ struct amd_ir_data *ir_data,
+ struct irq_2_irte *irte_info,
+ struct irq_cfg *cfg)
+{
+
+ /*
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
+ */
+ iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
+ irte_info->index, cfg->vector,
+ cfg->dest_apicid);
+}
+
static int amd_ir_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
@@ -4286,13 +4312,7 @@ static int amd_ir_set_affinity(struct irq_data *data,
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- /*
- * Atomically updates the IRTE with the new destination, vector
- * and flushes the interrupt entry cache.
- */
- iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
- irte_info->index, cfg->vector, cfg->dest_apicid);
-
+ amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 25842b566c39..76a193c7fcfc 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1122,6 +1122,24 @@ struct irq_remap_ops intel_irq_remap_ops = {
.get_irq_domain = intel_get_irq_domain,
};
+static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
+{
+ struct intel_ir_data *ir_data = irqd->chip_data;
+ struct irte *irte = &ir_data->irte_entry;
+ struct irq_cfg *cfg = irqd_cfg(irqd);
+
+ /*
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
+ */
+ irte->vector = cfg->vector;
+ irte->dest_id = IRTE_DEST(cfg->dest_apicid);
+
+ /* Update the hardware only if the interrupt is in remapped mode. */
+ if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+ modify_irte(&ir_data->irq_2_iommu, irte);
+}
+
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
*
@@ -1140,27 +1158,15 @@ static int
intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
- struct intel_ir_data *ir_data = data->chip_data;
- struct irte *irte = &ir_data->irte_entry;
- struct irq_cfg *cfg = irqd_cfg(data);
struct irq_data *parent = data->parent_data;
+ struct irq_cfg *cfg = irqd_cfg(data);
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- /*
- * Atomically updates the IRTE with the new destination, vector
- * and flushes the interrupt entry cache.
- */
- irte->vector = cfg->vector;
- irte->dest_id = IRTE_DEST(cfg->dest_apicid);
-
- /* Update the hardware only if the interrupt is in remapped mode. */
- if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
- modify_irte(&ir_data->irq_2_iommu, irte);
-
+ intel_ir_reconfigure_irte(data, false);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
@@ -1390,12 +1396,11 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}
-static void intel_irq_remapping_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static int intel_irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
- struct intel_ir_data *data = irq_data->chip_data;
-
- modify_irte(&data->irq_2_iommu, &data->irte_entry);
+ intel_ir_reconfigure_irte(irq_data, true);
+ return 0;
}
static void intel_irq_remapping_deactivate(struct irq_domain *domain,
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index e88395605e32..e2339af8054c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2209,8 +2209,8 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
return 0;
}
-static void its_irq_domain_activate(struct irq_domain *domain,
- struct irq_data *d)
+static int its_irq_domain_activate(struct irq_domain *domain,
+ struct irq_data *d, bool early)
{
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
u32 event = its_get_event_id(d);
@@ -2228,6 +2228,7 @@ static void its_irq_domain_activate(struct irq_domain *domain,
/* Map the GIC IRQ and event to the device */
its_send_mapti(its_dev, d->hwirq, event);
+ return 0;
}
static void its_irq_domain_deactivate(struct irq_domain *domain,
@@ -2701,8 +2702,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
return err;
}
-static void its_vpe_irq_domain_activate(struct irq_domain *domain,
- struct irq_data *d)
+static int its_vpe_irq_domain_activate(struct irq_domain *domain,
+ struct irq_data *d, bool early)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
@@ -2710,6 +2711,7 @@ static void its_vpe_irq_domain_activate(struct irq_domain *domain,
vpe->col_idx = cpumask_first(cpu_online_mask);
its_send_vmapp(vpe, true);
its_send_vinvall(vpe);
+ return 0;
}
static void its_vpe_irq_domain_deactivate(struct irq_domain *domain,
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 496ed9130600..e06607167858 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1441,6 +1441,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
pci_msi_domain_update_chip_ops(info);
info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
+ info->flags |= MSI_FLAG_MUST_REACTIVATE;
domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 50299ad96659..02b66588cac6 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -289,13 +289,14 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
return 0;
}
-static void stm32_gpio_domain_activate(struct irq_domain *d,
- struct irq_data *irq_data)
+static int stm32_gpio_domain_activate(struct irq_domain *d,
+ struct irq_data *irq_data, bool early)
{
struct stm32_gpio_bank *bank = d->host_data;
struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
regmap_field_write(pctl->irqmux[irq_data->hwirq], bank->bank_nr);
+ return 0;
}
static int stm32_gpio_domain_alloc(struct irq_domain *d,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4536286cc4d2..b01d06db9101 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1114,6 +1114,28 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
return readl(gc->reg_base + reg_offset);
}
+struct irq_matrix;
+struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
+ unsigned int alloc_start,
+ unsigned int alloc_end);
+void irq_matrix_online(struct irq_matrix *m);
+void irq_matrix_offline(struct irq_matrix *m);
+void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace);
+int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk);
+void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk);
+int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu);
+void irq_matrix_reserve(struct irq_matrix *m);
+void irq_matrix_remove_reserved(struct irq_matrix *m);
+int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
+ bool reserved, unsigned int *mapped_cpu);
+void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
+ unsigned int bit, bool managed);
+void irq_matrix_assign(struct irq_matrix *m, unsigned int bit);
+unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown);
+unsigned int irq_matrix_allocated(struct irq_matrix *m);
+unsigned int irq_matrix_reserved(struct irq_matrix *m);
+void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind);
+
/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
#define INVALID_HWIRQ (~0UL)
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index b6084898d330..60e3100b0809 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -94,6 +94,7 @@ struct irq_desc {
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
struct dentry *debugfs_file;
+ const char *dev_name;
#endif
#ifdef CONFIG_SPARSE_IRQ
struct rcu_head rcu;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index b1037dfc47e4..0d6f05cab0fe 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -41,6 +41,7 @@ struct of_device_id;
struct irq_chip;
struct irq_data;
struct cpumask;
+struct seq_file;
/* Number of irqs reserved for a legacy isa controller */
#define NUM_ISA_INTERRUPTS 16
@@ -105,18 +106,21 @@ struct irq_domain_ops {
int (*xlate)(struct irq_domain *d, struct device_node *node,
const u32 *intspec, unsigned int intsize,
unsigned long *out_hwirq, unsigned int *out_type);
-
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
/* extended V2 interfaces to support hierarchy irq_domains */
int (*alloc)(struct irq_domain *d, unsigned int virq,
unsigned int nr_irqs, void *arg);
void (*free)(struct irq_domain *d, unsigned int virq,
unsigned int nr_irqs);
- void (*activate)(struct irq_domain *d, struct irq_data *irq_data);
+ int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early);
void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
unsigned long *out_hwirq, unsigned int *out_type);
#endif
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+ void (*debug_show)(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind);
+#endif
};
extern struct irq_domain_ops irq_generic_chip_ops;
@@ -438,7 +442,7 @@ extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct cpumask *affinity);
extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
-extern void irq_domain_activate_irq(struct irq_data *irq_data);
+extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
@@ -508,8 +512,6 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
-static inline void irq_domain_activate_irq(struct irq_data *data) { }
-static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
unsigned int nr_irqs, int node, void *arg)
{
@@ -558,8 +560,6 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
#else /* CONFIG_IRQ_DOMAIN */
static inline void irq_dispose_mapping(unsigned int virq) { }
-static inline void irq_domain_activate_irq(struct irq_data *data) { }
-static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
static inline struct irq_domain *irq_find_matching_fwnode(
struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
{
diff --git a/include/linux/msi.h b/include/linux/msi.h
index cdd069cf9ed8..1f1bbb5b4679 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -284,6 +284,11 @@ enum {
MSI_FLAG_PCI_MSIX = (1 << 3),
/* Needs early activate, required for PCI */
MSI_FLAG_ACTIVATE_EARLY = (1 << 4),
+ /*
+ * Must reactivate when irq is started even when
+ * MSI_FLAG_ACTIVATE_EARLY has been set.
+ */
+ MSI_FLAG_MUST_REACTIVATE = (1 << 5),
};
int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/trace/events/irq_matrix.h b/include/trace/events/irq_matrix.h
new file mode 100644
index 000000000000..267d4cbbf360
--- /dev/null
+++ b/include/trace/events/irq_matrix.h
@@ -0,0 +1,201 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq_matrix
+
+#if !defined(_TRACE_IRQ_MATRIX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_MATRIX_H
+
+#include <linux/tracepoint.h>
+
+struct irq_matrix;
+struct cpumap;
+
+DECLARE_EVENT_CLASS(irq_matrix_global,
+
+ TP_PROTO(struct irq_matrix *matrix),
+
+ TP_ARGS(matrix),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, online_maps )
+ __field( unsigned int, global_available )
+ __field( unsigned int, global_reserved )
+ __field( unsigned int, total_allocated )
+ ),
+
+ TP_fast_assign(
+ __entry->online_maps = matrix->online_maps;
+ __entry->global_available = matrix->global_available;
+ __entry->global_reserved = matrix->global_reserved;
+ __entry->total_allocated = matrix->total_allocated;
+ ),
+
+ TP_printk("online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u",
+ __entry->online_maps, __entry->global_available,
+ __entry->global_reserved, __entry->total_allocated)
+);
+
+DECLARE_EVENT_CLASS(irq_matrix_global_update,
+
+ TP_PROTO(int bit, struct irq_matrix *matrix),
+
+ TP_ARGS(bit, matrix),
+
+ TP_STRUCT__entry(
+ __field( int, bit )
+ __field( unsigned int, online_maps )
+ __field( unsigned int, global_available )
+ __field( unsigned int, global_reserved )
+ __field( unsigned int, total_allocated )
+ ),
+
+ TP_fast_assign(
+ __entry->bit = bit;
+ __entry->online_maps = matrix->online_maps;
+ __entry->global_available = matrix->global_available;
+ __entry->global_reserved = matrix->global_reserved;
+ __entry->total_allocated = matrix->total_allocated;
+ ),
+
+ TP_printk("bit=%d online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u",
+ __entry->bit, __entry->online_maps,
+ __entry->global_available, __entry->global_reserved,
+ __entry->total_allocated)
+);
+
+DECLARE_EVENT_CLASS(irq_matrix_cpu,
+
+ TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix,
+ struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap),
+
+ TP_STRUCT__entry(
+ __field( int, bit )
+ __field( unsigned int, cpu )
+ __field( bool, online )
+ __field( unsigned int, available )
+ __field( unsigned int, allocated )
+ __field( unsigned int, managed )
+ __field( unsigned int, online_maps )
+ __field( unsigned int, global_available )
+ __field( unsigned int, global_reserved )
+ __field( unsigned int, total_allocated )
+ ),
+
+ TP_fast_assign(
+ __entry->bit = bit;
+ __entry->cpu = cpu;
+ __entry->online = cmap->online;
+ __entry->available = cmap->available;
+ __entry->allocated = cmap->allocated;
+ __entry->managed = cmap->managed;
+ __entry->online_maps = matrix->online_maps;
+ __entry->global_available = matrix->global_available;
+ __entry->global_reserved = matrix->global_reserved;
+ __entry->total_allocated = matrix->total_allocated;
+ ),
+
+ TP_printk("bit=%d cpu=%u online=%d avl=%u alloc=%u managed=%u online_maps=%u global_avl=%u, global_rsvd=%u, total_alloc=%u",
+ __entry->bit, __entry->cpu, __entry->online,
+ __entry->available, __entry->allocated,
+ __entry->managed, __entry->online_maps,
+ __entry->global_available, __entry->global_reserved,
+ __entry->total_allocated)
+);
+
+DEFINE_EVENT(irq_matrix_global, irq_matrix_online,
+
+ TP_PROTO(struct irq_matrix *matrix),
+
+ TP_ARGS(matrix)
+);
+
+DEFINE_EVENT(irq_matrix_global, irq_matrix_offline,
+
+ TP_PROTO(struct irq_matrix *matrix),
+
+ TP_ARGS(matrix)
+);
+
+DEFINE_EVENT(irq_matrix_global, irq_matrix_reserve,
+
+ TP_PROTO(struct irq_matrix *matrix),
+
+ TP_ARGS(matrix)
+);
+
+DEFINE_EVENT(irq_matrix_global, irq_matrix_remove_reserved,
+
+ TP_PROTO(struct irq_matrix *matrix),
+
+ TP_ARGS(matrix)
+);
+
+DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system,
+
+ TP_PROTO(int bit, struct irq_matrix *matrix),
+
+ TP_ARGS(bit, matrix)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_reserved,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_remove_managed,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_managed,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_assign,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+DEFINE_EVENT(irq_matrix_cpu, irq_matrix_free,
+
+ TP_PROTO(int bit, unsigned int cpu,
+ struct irq_matrix *matrix, struct cpumap *cmap),
+
+ TP_ARGS(bit, cpu, matrix, cmap)
+);
+
+
+#endif /* _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/main.c b/init/main.c
index 0ee9c6866ada..2fb98a48b6ae 100644
--- a/init/main.c
+++ b/init/main.c
@@ -664,12 +664,12 @@ asmlinkage __visible void __init start_kernel(void)
debug_objects_mem_init();
setup_per_cpu_pageset();
numa_policy_init();
+ acpi_early_init();
if (late_time_init)
late_time_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
- acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index a117adf7084b..89e355866450 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -97,6 +97,12 @@ config HANDLE_DOMAIN_IRQ
config IRQ_TIMINGS
bool
+config GENERIC_IRQ_MATRIX_ALLOCATOR
+ bool
+
+config GENERIC_IRQ_RESERVATION_MODE
+ bool
+
config IRQ_DOMAIN_DEBUG
bool "Expose hardware/virtual IRQ mapping via debugfs"
depends on IRQ_DOMAIN && DEBUG_FS
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index ed15d142694b..ff6e352e3a6c 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
obj-$(CONFIG_SMP) += affinity.o
obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
+obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index befa671fba64..4e8089b319ae 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -54,7 +54,7 @@ unsigned long probe_irq_on(void)
if (desc->irq_data.chip->irq_set_type)
desc->irq_data.chip->irq_set_type(&desc->irq_data,
IRQ_TYPE_PROBE);
- irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE);
+ irq_activate_and_startup(desc, IRQ_NORESEND);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 5a2ef92c2782..043bfc35b353 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -207,20 +207,24 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
* Catch code which fiddles with enable_irq() on a managed
* and potentially shutdown IRQ. Chained interrupt
* installment or irq auto probing should not happen on
- * managed irqs either. Emit a warning, break the affinity
- * and start it up as a normal interrupt.
+ * managed irqs either.
*/
if (WARN_ON_ONCE(force))
- return IRQ_STARTUP_NORMAL;
+ return IRQ_STARTUP_ABORT;
/*
* The interrupt was requested, but there is no online CPU
* in it's affinity mask. Put it into managed shutdown
* state and let the cpu hotplug mechanism start it up once
* a CPU in the mask becomes available.
*/
- irqd_set_managed_shutdown(d);
return IRQ_STARTUP_ABORT;
}
+ /*
+ * Managed interrupts have reserved resources, so this should not
+ * happen.
+ */
+ if (WARN_ON(irq_domain_activate_irq(d, false)))
+ return IRQ_STARTUP_ABORT;
return IRQ_STARTUP_MANAGED;
}
#else
@@ -236,7 +240,9 @@ static int __irq_startup(struct irq_desc *desc)
struct irq_data *d = irq_desc_get_irq_data(desc);
int ret = 0;
- irq_domain_activate_irq(d);
+ /* Warn if this interrupt is not activated but try nevertheless */
+ WARN_ON_ONCE(!irqd_is_activated(d));
+
if (d->chip->irq_startup) {
ret = d->chip->irq_startup(d);
irq_state_clr_disabled(desc);
@@ -269,6 +275,7 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
ret = __irq_startup(desc);
break;
case IRQ_STARTUP_ABORT:
+ irqd_set_managed_shutdown(d);
return 0;
}
}
@@ -278,6 +285,22 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
return ret;
}
+int irq_activate(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+
+ if (!irqd_affinity_is_managed(d))
+ return irq_domain_activate_irq(d, false);
+ return 0;
+}
+
+void irq_activate_and_startup(struct irq_desc *desc, bool resend)
+{
+ if (WARN_ON(irq_activate(desc)))
+ return;
+ irq_startup(desc, resend, IRQ_START_FORCE);
+}
+
static void __irq_disable(struct irq_desc *desc, bool mask);
void irq_shutdown(struct irq_desc *desc)
@@ -953,7 +976,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
desc->action = &chained_action;
- irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
+ irq_activate_and_startup(desc, IRQ_RESEND);
}
}
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index c3fdb36dec30..7f608ac39653 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -81,6 +81,8 @@ irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind)
data->domain ? data->domain->name : "");
seq_printf(m, "%*shwirq: 0x%lx\n", ind + 1, "", data->hwirq);
irq_debug_show_chip(m, data, ind + 1);
+ if (data->domain && data->domain->ops && data->domain->ops->debug_show)
+ data->domain->ops->debug_show(m, NULL, data, ind + 1);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
if (!data->parent_data)
return;
@@ -149,6 +151,7 @@ static int irq_debug_show(struct seq_file *m, void *p)
raw_spin_lock_irq(&desc->lock);
data = irq_desc_get_irq_data(desc);
seq_printf(m, "handler: %pf\n", desc->handle_irq);
+ seq_printf(m, "device: %s\n", desc->dev_name);
seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors);
irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states,
ARRAY_SIZE(irqdesc_states));
@@ -226,6 +229,15 @@ static const struct file_operations dfs_irq_ops = {
.release = single_release,
};
+void irq_debugfs_copy_devname(int irq, struct device *dev)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ const char *name = dev_name(dev);
+
+ if (name)
+ desc->dev_name = kstrdup(name, GFP_KERNEL);
+}
+
void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc)
{
char name [10];
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 44ed5f8c8759..07d08ca701ec 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -75,6 +75,8 @@ extern void __enable_irq(struct irq_desc *desc);
#define IRQ_START_FORCE true
#define IRQ_START_COND false
+extern int irq_activate(struct irq_desc *desc);
+extern void irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
@@ -437,6 +439,18 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
}
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
+#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
+static inline int irq_domain_activate_irq(struct irq_data *data, bool early)
+{
+ irqd_set_activated(data);
+ return 0;
+}
+static inline void irq_domain_deactivate_irq(struct irq_data *data)
+{
+ irqd_clr_activated(data);
+}
+#endif
+
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
#include <linux/debugfs.h>
@@ -444,7 +458,9 @@ void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
{
debugfs_remove(desc->debugfs_file);
+ kfree(desc->dev_name);
}
+void irq_debugfs_copy_devname(int irq, struct device *dev);
# ifdef CONFIG_IRQ_DOMAIN
void irq_domain_debugfs_init(struct dentry *root);
# else
@@ -459,4 +475,7 @@ static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
static inline void irq_remove_debugfs_entry(struct irq_desc *d)
{
}
+static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
+{
+}
#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 82afb7ed369f..982a3576fb01 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -448,7 +448,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
}
}
- flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
+ flags = affinity ? IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN : 0;
mask = NULL;
for (i = 0; i < cnt; i++) {
@@ -462,6 +462,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
goto err;
irq_insert_desc(start + i, desc);
irq_sysfs_add(start + i, desc);
+ irq_add_debugfs_entry(start + i, desc);
}
bitmap_set(allocated_irqs, start, cnt);
return start;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index ac4644e92b49..8de94508251f 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1682,28 +1682,36 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain,
}
EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
-static void __irq_domain_activate_irq(struct irq_data *irq_data)
+static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
{
if (irq_data && irq_data->domain) {
struct irq_domain *domain = irq_data->domain;
+ if (domain->ops->deactivate)
+ domain->ops->deactivate(domain, irq_data);
if (irq_data->parent_data)
- __irq_domain_activate_irq(irq_data->parent_data);
- if (domain->ops->activate)
- domain->ops->activate(domain, irq_data);
+ __irq_domain_deactivate_irq(irq_data->parent_data);
}
}
-static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
+static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
{
- if (irq_data && irq_data->domain) {
- struct irq_domain *domain = irq_data->domain;
+ int ret = 0;
- if (domain->ops->deactivate)
- domain->ops->deactivate(domain, irq_data);
- if (irq_data->parent_data)
- __irq_domain_deactivate_irq(irq_data->parent_data);
+ if (irqd && irqd->domain) {
+ struct irq_domain *domain = irqd->domain;
+
+ if (irqd->parent_data)
+ ret = __irq_domain_activate_irq(irqd->parent_data,
+ early);
+ if (!ret && domain->ops->activate) {
+ ret = domain->ops->activate(domain, irqd, early);
+ /* Rollback in case of error */
+ if (ret && irqd->parent_data)
+ __irq_domain_deactivate_irq(irqd->parent_data);
+ }
}
+ return ret;
}
/**
@@ -1714,12 +1722,15 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
* This is the second step to call domain_ops->activate to program interrupt
* controllers, so the interrupt could actually get delivered.
*/
-void irq_domain_activate_irq(struct irq_data *irq_data)
+int irq_domain_activate_irq(struct irq_data *irq_data, bool early)
{
- if (!irqd_is_activated(irq_data)) {
- __irq_domain_activate_irq(irq_data);
+ int ret = 0;
+
+ if (!irqd_is_activated(irq_data))
+ ret = __irq_domain_activate_irq(irq_data, early);
+ if (!ret)
irqd_set_activated(irq_data);
- }
+ return ret;
}
/**
@@ -1810,6 +1821,8 @@ irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
d->revmap_size + d->revmap_direct_max_irq);
seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags);
+ if (d->ops && d->ops->debug_show)
+ d->ops->debug_show(m, d, NULL, ind + 1);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
if (!d->parent)
return;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4bff6a10ae8e..24758ffd065d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -536,7 +536,7 @@ void __enable_irq(struct irq_desc *desc)
* time. If it was already started up, then irq_startup()
* will invoke irq_enable() under the hood.
*/
- irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
break;
}
default:
@@ -1342,6 +1342,21 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
goto out_unlock;
}
+ /*
+ * Activate the interrupt. That activation must happen
+ * independently of IRQ_NOAUTOEN. request_irq() can fail
+ * and the callers are supposed to handle
+ * that. enable_irq() of an interrupt requested with
+ * IRQ_NOAUTOEN is not supposed to fail. The activation
+ * keeps it in shutdown mode, it merily associates
+ * resources if necessary and if that's not possible it
+ * fails. Interrupts which are in managed shutdown mode
+ * will simply ignore that activation request.
+ */
+ ret = irq_activate(desc);
+ if (ret)
+ goto out_unlock;
+
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
IRQS_ONESHOT | IRQS_WAITING);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
@@ -1417,7 +1432,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
wake_up_process(new->secondary->thread);
register_irq_proc(irq, desc);
- irq_add_debugfs_entry(irq, desc);
new->dir = NULL;
register_handler_proc(irq, new);
return 0;
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
new file mode 100644
index 000000000000..a3cbbc8191c5
--- /dev/null
+++ b/kernel/irq/matrix.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+#include <linux/spinlock.h>
+#include <linux/seq_file.h>
+#include <linux/bitmap.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+
+#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS) * sizeof(unsigned long))
+
+struct cpumap {
+ unsigned int available;
+ unsigned int allocated;
+ unsigned int managed;
+ bool online;
+ unsigned long alloc_map[IRQ_MATRIX_SIZE];
+ unsigned long managed_map[IRQ_MATRIX_SIZE];
+};
+
+struct irq_matrix {
+ unsigned int matrix_bits;
+ unsigned int alloc_start;
+ unsigned int alloc_end;
+ unsigned int alloc_size;
+ unsigned int global_available;
+ unsigned int global_reserved;
+ unsigned int systembits_inalloc;
+ unsigned int total_allocated;
+ unsigned int online_maps;
+ struct cpumap __percpu *maps;
+ unsigned long scratch_map[IRQ_MATRIX_SIZE];
+ unsigned long system_map[IRQ_MATRIX_SIZE];
+};
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq_matrix.h>
+
+/**
+ * irq_alloc_matrix - Allocate a irq_matrix structure and initialize it
+ * @matrix_bits: Number of matrix bits must be <= IRQ_MATRIX_BITS
+ * @alloc_start: From which bit the allocation search starts
+ * @alloc_end: At which bit the allocation search ends, i.e first
+ * invalid bit
+ */
+__init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
+ unsigned int alloc_start,
+ unsigned int alloc_end)
+{
+ struct irq_matrix *m;
+
+ if (matrix_bits > IRQ_MATRIX_BITS)
+ return NULL;
+
+ m = kzalloc(sizeof(*m), GFP_KERNEL);
+ if (!m)
+ return NULL;
+
+ m->matrix_bits = matrix_bits;
+ m->alloc_start = alloc_start;
+ m->alloc_end = alloc_end;
+ m->alloc_size = alloc_end - alloc_start;
+ m->maps = alloc_percpu(*m->maps);
+ if (!m->maps) {
+ kfree(m);
+ return NULL;
+ }
+ return m;
+}
+
+/**
+ * irq_matrix_online - Bring the local CPU matrix online
+ * @m: Matrix pointer
+ */
+void irq_matrix_online(struct irq_matrix *m)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ BUG_ON(cm->online);
+
+ bitmap_zero(cm->alloc_map, m->matrix_bits);
+ cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc);
+ cm->allocated = 0;
+ m->global_available += cm->available;
+ cm->online = true;
+ m->online_maps++;
+ trace_irq_matrix_online(m);
+}
+
+/**
+ * irq_matrix_offline - Bring the local CPU matrix offline
+ * @m: Matrix pointer
+ */
+void irq_matrix_offline(struct irq_matrix *m)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ /* Update the global available size */
+ m->global_available -= cm->available;
+ cm->online = false;
+ m->online_maps--;
+ trace_irq_matrix_offline(m);
+}
+
+static unsigned int matrix_alloc_area(struct irq_matrix *m, struct cpumap *cm,
+ unsigned int num, bool managed)
+{
+ unsigned int area, start = m->alloc_start;
+ unsigned int end = m->alloc_end;
+
+ bitmap_or(m->scratch_map, cm->managed_map, m->system_map, end);
+ bitmap_or(m->scratch_map, m->scratch_map, cm->alloc_map, end);
+ area = bitmap_find_next_zero_area(m->scratch_map, end, start, num, 0);
+ if (area >= end)
+ return area;
+ if (managed)
+ bitmap_set(cm->managed_map, area, num);
+ else
+ bitmap_set(cm->alloc_map, area, num);
+ return area;
+}
+
+/**
+ * irq_matrix_assign_system - Assign system wide entry in the matrix
+ * @m: Matrix pointer
+ * @bit: Which bit to reserve
+ * @replace: Replace an already allocated vector with a system
+ * vector at the same bit position.
+ *
+ * The BUG_ON()s below are on purpose. If this goes wrong in the
+ * early boot process, then the chance to survive is about zero.
+ * If this happens when the system is life, it's not much better.
+ */
+void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit,
+ bool replace)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ BUG_ON(bit > m->matrix_bits);
+ BUG_ON(m->online_maps > 1 || (m->online_maps && !replace));
+
+ set_bit(bit, m->system_map);
+ if (replace) {
+ BUG_ON(!test_and_clear_bit(bit, cm->alloc_map));
+ cm->allocated--;
+ m->total_allocated--;
+ }
+ if (bit >= m->alloc_start && bit < m->alloc_end)
+ m->systembits_inalloc++;
+
+ trace_irq_matrix_assign_system(bit, m);
+}
+
+/**
+ * irq_matrix_reserve_managed - Reserve a managed interrupt in a CPU map
+ * @m: Matrix pointer
+ * @msk: On which CPUs the bits should be reserved.
+ *
+ * Can be called for offline CPUs. Note, this will only reserve one bit
+ * on all CPUs in @msk, but it's not guaranteed that the bits are at the
+ * same offset on all CPUs
+ */
+int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk)
+{
+ unsigned int cpu, failed_cpu;
+
+ for_each_cpu(cpu, msk) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+ unsigned int bit;
+
+ bit = matrix_alloc_area(m, cm, 1, true);
+ if (bit >= m->alloc_end)
+ goto cleanup;
+ cm->managed++;
+ if (cm->online) {
+ cm->available--;
+ m->global_available--;
+ }
+ trace_irq_matrix_reserve_managed(bit, cpu, m, cm);
+ }
+ return 0;
+cleanup:
+ failed_cpu = cpu;
+ for_each_cpu(cpu, msk) {
+ if (cpu == failed_cpu)
+ break;
+ irq_matrix_remove_managed(m, cpumask_of(cpu));
+ }
+ return -ENOSPC;
+}
+
+/**
+ * irq_matrix_remove_managed - Remove managed interrupts in a CPU map
+ * @m: Matrix pointer
+ * @msk: On which CPUs the bits should be removed
+ *
+ * Can be called for offline CPUs
+ *
+ * This removes not allocated managed interrupts from the map. It does
+ * not matter which one because the managed interrupts free their
+ * allocation when they shut down. If not, the accounting is screwed,
+ * but all what can be done at this point is warn about it.
+ */
+void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, msk) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+ unsigned int bit, end = m->alloc_end;
+
+ if (WARN_ON_ONCE(!cm->managed))
+ continue;
+
+ /* Get managed bit which are not allocated */
+ bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end);
+
+ bit = find_first_bit(m->scratch_map, end);
+ if (WARN_ON_ONCE(bit >= end))
+ continue;
+
+ clear_bit(bit, cm->managed_map);
+
+ cm->managed--;
+ if (cm->online) {
+ cm->available++;
+ m->global_available++;
+ }
+ trace_irq_matrix_remove_managed(bit, cpu, m, cm);
+ }
+}
+
+/**
+ * irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map
+ * @m: Matrix pointer
+ * @cpu: On which CPU the interrupt should be allocated
+ */
+int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu)
+{
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+ unsigned int bit, end = m->alloc_end;
+
+ /* Get managed bit which are not allocated */
+ bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end);
+ bit = find_first_bit(m->scratch_map, end);
+ if (bit >= end)
+ return -ENOSPC;
+ set_bit(bit, cm->alloc_map);
+ cm->allocated++;
+ m->total_allocated++;
+ trace_irq_matrix_alloc_managed(bit, cpu, m, cm);
+ return bit;
+}
+
+/**
+ * irq_matrix_assign - Assign a preallocated interrupt in the local CPU map
+ * @m: Matrix pointer
+ * @bit: Which bit to mark
+ *
+ * This should only be used to mark preallocated vectors
+ */
+void irq_matrix_assign(struct irq_matrix *m, unsigned int bit)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
+ return;
+ if (WARN_ON_ONCE(test_and_set_bit(bit, cm->alloc_map)))
+ return;
+ cm->allocated++;
+ m->total_allocated++;
+ cm->available--;
+ m->global_available--;
+ trace_irq_matrix_assign(bit, smp_processor_id(), m, cm);
+}
+
+/**
+ * irq_matrix_reserve - Reserve interrupts
+ * @m: Matrix pointer
+ *
+ * This is merily a book keeping call. It increments the number of globally
+ * reserved interrupt bits w/o actually allocating them. This allows to
+ * setup interrupt descriptors w/o assigning low level resources to it.
+ * The actual allocation happens when the interrupt gets activated.
+ */
+void irq_matrix_reserve(struct irq_matrix *m)
+{
+ if (m->global_reserved <= m->global_available &&
+ m->global_reserved + 1 > m->global_available)
+ pr_warn("Interrupt reservation exceeds available resources\n");
+
+ m->global_reserved++;
+ trace_irq_matrix_reserve(m);
+}
+
+/**
+ * irq_matrix_remove_reserved - Remove interrupt reservation
+ * @m: Matrix pointer
+ *
+ * This is merily a book keeping call. It decrements the number of globally
+ * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the
+ * interrupt was never in use and a real vector allocated, which undid the
+ * reservation.
+ */
+void irq_matrix_remove_reserved(struct irq_matrix *m)
+{
+ m->global_reserved--;
+ trace_irq_matrix_remove_reserved(m);
+}
+
+/**
+ * irq_matrix_alloc - Allocate a regular interrupt in a CPU map
+ * @m: Matrix pointer
+ * @msk: Which CPUs to search in
+ * @reserved: Allocate previously reserved interrupts
+ * @mapped_cpu: Pointer to store the CPU for which the irq was allocated
+ */
+int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
+ bool reserved, unsigned int *mapped_cpu)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, msk) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+ unsigned int bit;
+
+ if (!cm->online)
+ continue;
+
+ bit = matrix_alloc_area(m, cm, 1, false);
+ if (bit < m->alloc_end) {
+ cm->allocated++;
+ cm->available--;
+ m->total_allocated++;
+ m->global_available--;
+ if (reserved)
+ m->global_reserved--;
+ *mapped_cpu = cpu;
+ trace_irq_matrix_alloc(bit, cpu, m, cm);
+ return bit;
+ }
+ }
+ return -ENOSPC;
+}
+
+/**
+ * irq_matrix_free - Free allocated interrupt in the matrix
+ * @m: Matrix pointer
+ * @cpu: Which CPU map needs be updated
+ * @bit: The bit to remove
+ * @managed: If true, the interrupt is managed and not accounted
+ * as available.
+ */
+void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
+ unsigned int bit, bool managed)
+{
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+
+ if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
+ return;
+
+ if (cm->online) {
+ clear_bit(bit, cm->alloc_map);
+ cm->allocated--;
+ m->total_allocated--;
+ if (!managed) {
+ cm->available++;
+ m->global_available++;
+ }
+ }
+ trace_irq_matrix_free(bit, cpu, m, cm);
+}
+
+/**
+ * irq_matrix_available - Get the number of globally available irqs
+ * @m: Pointer to the matrix to query
+ * @cpudown: If true, the local CPU is about to go down, adjust
+ * the number of available irqs accordingly
+ */
+unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ return m->global_available - cpudown ? cm->available : 0;
+}
+
+/**
+ * irq_matrix_reserved - Get the number of globally reserved irqs
+ * @m: Pointer to the matrix to query
+ */
+unsigned int irq_matrix_reserved(struct irq_matrix *m)
+{
+ return m->global_reserved;
+}
+
+/**
+ * irq_matrix_allocated - Get the number of allocated irqs on the local cpu
+ * @m: Pointer to the matrix to search
+ *
+ * This returns number of allocated irqs
+ */
+unsigned int irq_matrix_allocated(struct irq_matrix *m)
+{
+ struct cpumap *cm = this_cpu_ptr(m->maps);
+
+ return cm->allocated;
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+/**
+ * irq_matrix_debug_show - Show detailed allocation information
+ * @sf: Pointer to the seq_file to print to
+ * @m: Pointer to the matrix allocator
+ * @ind: Indentation for the print format
+ *
+ * Note, this is a lockless snapshot.
+ */
+void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind)
+{
+ unsigned int nsys = bitmap_weight(m->system_map, m->matrix_bits);
+ int cpu;
+
+ seq_printf(sf, "Online bitmaps: %6u\n", m->online_maps);
+ seq_printf(sf, "Global available: %6u\n", m->global_available);
+ seq_printf(sf, "Global reserved: %6u\n", m->global_reserved);
+ seq_printf(sf, "Total allocated: %6u\n", m->total_allocated);
+ seq_printf(sf, "System: %u: %*pbl\n", nsys, m->matrix_bits,
+ m->system_map);
+ seq_printf(sf, "%*s| CPU | avl | man | act | vectors\n", ind, " ");
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+
+ seq_printf(sf, "%*s %4d %4u %4u %4u %*pbl\n", ind, " ",
+ cpu, cm->available, cm->managed, cm->allocated,
+ m->matrix_bits, cm->alloc_map);
+ }
+ cpus_read_unlock();
+}
+#endif
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 3fa4bd59f569..edb987b2c58d 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -16,6 +16,8 @@
#include <linux/msi.h>
#include <linux/slab.h>
+#include "internals.h"
+
/**
* alloc_msi_entry - Allocate an initialize msi_entry
* @dev: Pointer to the device for which this is allocated
@@ -100,13 +102,14 @@ int msi_domain_set_affinity(struct irq_data *irq_data,
return ret;
}
-static void msi_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static int msi_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
struct msi_msg msg;
BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
irq_chip_write_msi_msg(irq_data, &msg);
+ return 0;
}
static void msi_domain_deactivate(struct irq_domain *domain,
@@ -373,8 +376,10 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
return ret;
}
- for (i = 0; i < desc->nvec_used; i++)
+ for (i = 0; i < desc->nvec_used; i++) {
irq_set_msi_desc_off(virq, i, desc);
+ irq_debugfs_copy_devname(virq + i, dev);
+ }
}
if (ops->msi_finish)
@@ -396,11 +401,28 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
struct irq_data *irq_data;
irq_data = irq_domain_get_irq_data(domain, desc->irq);
- irq_domain_activate_irq(irq_data);
+ ret = irq_domain_activate_irq(irq_data, true);
+ if (ret)
+ goto cleanup;
+ if (info->flags & MSI_FLAG_MUST_REACTIVATE)
+ irqd_clr_activated(irq_data);
}
}
-
return 0;
+
+cleanup:
+ for_each_msi_entry(desc, dev) {
+ struct irq_data *irqd;
+
+ if (desc->irq == virq)
+ break;
+
+ irqd = irq_domain_get_irq_data(domain, desc->irq);
+ if (irqd_is_activated(irqd))
+ irq_domain_deactivate_irq(irqd);
+ }
+ msi_domain_free_irqs(domain, dev);
+ return ret;
}
/**