diff options
Diffstat (limited to 'arch/x86/kernel/apic')
| -rw-r--r-- | arch/x86/kernel/apic/Makefile | 7 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic.c | 806 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_common.c | 32 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_flat_64.c | 203 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_noop.c | 93 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_numachip.c | 88 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/bigsmp_32.c | 189 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/init.c | 110 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 911 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/ipi.c | 214 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/local.h | 19 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/msi.c | 215 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_32.c | 128 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_64.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/vector.c | 359 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 158 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 85 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 453 | 
19 files changed, 1572 insertions, 2524 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index a6fcaf16cdbf..52d1808ee360 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -4,10 +4,10 @@  #  # Leads to non-deterministic coverage that is not a function of syscall inputs. -# In particualr, smp_apic_timer_interrupt() is called in random places. +# In particular, smp_apic_timer_interrupt() is called in random places.  KCOV_INSTRUMENT		:= n -obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_common.o apic_noop.o ipi.o vector.o +obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_common.o apic_noop.o ipi.o vector.o init.o  obj-y				+= hw_nmi.o  obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o @@ -23,8 +23,5 @@ obj-$(CONFIG_X86_X2APIC)	+= x2apic_cluster.o  obj-y				+= apic_flat_64.o  endif -# APIC probe will depend on the listing order here -obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o -  # For 32bit, probe_32 need to be listed last  obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 189d3a5e471a..d73ba5a7b623 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -19,6 +19,7 @@  #include <linux/kernel_stat.h>  #include <linux/mc146818rtc.h>  #include <linux/acpi_pmtmr.h> +#include <linux/bitmap.h>  #include <linux/clockchips.h>  #include <linux/interrupt.h>  #include <linux/memblock.h> @@ -36,6 +37,8 @@  #include <linux/smp.h>  #include <linux/mm.h> +#include <xen/xen.h> +  #include <asm/trace/irq_vectors.h>  #include <asm/irq_remapping.h>  #include <asm/pc-conf-reg.h> @@ -56,40 +59,23 @@  #include <asm/time.h>  #include <asm/smp.h>  #include <asm/mce.h> +#include <asm/msr.h>  #include <asm/tsc.h>  #include <asm/hypervisor.h>  #include <asm/cpu_device_id.h>  #include <asm/intel-family.h>  #include <asm/irq_regs.h> +#include <asm/cpu.h> -unsigned int num_processors; - -unsigned disabled_cpus; +#include "local.h"  /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; +u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID;  EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);  u8 boot_cpu_apic_version __ro_after_init;  /* - * The highest APIC ID seen during enumeration. - */ -static unsigned int max_physical_apicid; - -/* - * Bitmask of physically existing CPUs: - */ -physid_mask_t phys_cpu_present_map; - -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; - -/*   * This variable controls which CPUs receive external NMIs.  By default,   * external NMIs are delivered only to the BSP.   */ @@ -100,26 +86,15 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;   */  static bool virt_ext_dest_id __ro_after_init; -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); - -#ifdef CONFIG_X86_32 +/* For parallel bootup. */ +unsigned long apic_mmio_base __ro_after_init; -/* - * On x86_32, the mapping between cpu and logical apicid may vary - * depending on apic in use.  The following early percpu variable is - * used for the mapping.  This is where the behaviors of x86_64 and 32 - * actually diverge.  Let's keep it ugly for now. - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); +static inline bool apic_accessible(void) +{ +	return x2apic_mode || apic_mmio_base; +} +#ifdef CONFIG_X86_32  /* Local APIC was disabled by the BIOS and enabled by the kernel */  static int enabled_via_apicbase __ro_after_init; @@ -175,8 +150,8 @@ static __init int setup_apicpmtimer(char *s)  __setup("apicpmtimer", setup_apicpmtimer);  #endif -unsigned long mp_lapic_addr __ro_after_init; -int disable_apic __ro_after_init; +static unsigned long mp_lapic_addr __ro_after_init; +bool apic_is_disabled __ro_after_init;  /* Disable local APIC timer from the kernel commandline or via dmi quirk */  static int disable_apic_timer __initdata;  /* Local APIC timer works in C2 */ @@ -202,8 +177,6 @@ unsigned int lapic_timer_period = 0;  static void apic_pm_activate(void); -static unsigned long apic_phys __ro_after_init; -  /*   * Get the LAPIC version   */ @@ -243,31 +216,7 @@ static int modern_apic(void)   */  static void __init apic_disable(void)  { -	pr_info("APIC: switched to apic NOOP\n"); -	apic = &apic_noop; -} - -void native_apic_wait_icr_idle(void) -{ -	while (apic_read(APIC_ICR) & APIC_ICR_BUSY) -		cpu_relax(); -} - -u32 native_safe_apic_wait_icr_idle(void) -{ -	u32 send_status; -	int timeout; - -	timeout = 0; -	do { -		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; -		if (!send_status) -			break; -		inc_irq_stat(icr_read_retry_count); -		udelay(100); -	} while (timeout++ < 1000); - -	return send_status; +	apic_install_driver(&apic_noop);  }  void native_apic_icr_write(u32 low, u32 id) @@ -275,7 +224,7 @@ void native_apic_icr_write(u32 low, u32 id)  	unsigned long flags;  	local_irq_save(flags); -	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); +	apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id));  	apic_write(APIC_ICR, low);  	local_irq_restore(flags);  } @@ -290,16 +239,6 @@ u64 native_apic_icr_read(void)  	return icr1 | ((u64)icr2 << 32);  } -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ -	return modern_apic() ? 0xff : 0xf; -} -#endif -  /**   * lapic_get_maxlvt - get the maximum number of local vector table entries   */ @@ -421,10 +360,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)  		if (vector && !eilvt_entry_is_changeable(vector, new))  			/* may not change if vectors are different */  			return rsvd; -		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); -	} while (rsvd != new); +	} while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); -	rsvd &= ~APIC_EILVT_MASKED; +	rsvd = new & ~APIC_EILVT_MASKED;  	if (rsvd && rsvd != vector)  		pr_info("LVT offset %d assigned for vector 0x%02x\n",  			offset, rsvd); @@ -488,7 +426,7 @@ static int lapic_next_deadline(unsigned long delta,  	weak_wrmsr_fence();  	tsc = rdtsc(); -	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); +	wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));  	return 0;  } @@ -503,7 +441,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt)  	v = apic_read(APIC_LVTT);  	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);  	apic_write(APIC_LVTT, v); -	apic_write(APIC_TMICT, 0); + +	/* +	 * Setting APIC_LVT_MASKED (above) should be enough to tell +	 * the hardware that this timer will never fire. But AMD +	 * erratum 411 and some Intel CPU behavior circa 2024 say +	 * otherwise.  Time for belt and suspenders programming: mask +	 * the timer _and_ zero the counter registers: +	 */ +	if (v & APIC_LVT_TIMER_TSCDEADLINE) +		wrmsrq(MSR_IA32_TSC_DEADLINE, 0); +	else +		apic_write(APIC_TMICT, 0); +  	return 0;  } @@ -534,7 +484,7 @@ static int lapic_timer_set_oneshot(struct clock_event_device *evt)  static void lapic_timer_broadcast(const struct cpumask *mask)  {  #ifdef CONFIG_SMP -	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +	__apic_send_IPI_mask(mask, LOCAL_TIMER_VECTOR);  #endif  } @@ -560,32 +510,32 @@ static struct clock_event_device lapic_clockevent = {  static DEFINE_PER_CPU(struct clock_event_device, lapic_events);  static const struct x86_cpu_id deadline_match[] __initconst = { -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ +	X86_MATCH_VFM_STEPS(INTEL_HASWELL_X,   0x2, 0x2, 0x3a), /* EP */ +	X86_MATCH_VFM_STEPS(INTEL_HASWELL_X,   0x4, 0x4, 0x0f), /* EX */ -	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,	0x0b000020), +	X86_MATCH_VFM(INTEL_BROADWELL_X,	0x0b000020), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), +	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x2, 0x2, 0x00000011), +	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x3, 0x3, 0x0700000e), +	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x4, 0x4, 0x0f00000c), +	X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x5, 0x5, 0x0e000003), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), -	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), +	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x3, 0x3, 0x01000136), +	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x4, 0x4, 0x02000014), +	X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X,   0x5, 0xf, 0), -	X86_MATCH_INTEL_FAM6_MODEL( HASWELL,		0x22), -	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,		0x20), -	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G,		0x17), +	X86_MATCH_VFM(INTEL_HASWELL,		0x22), +	X86_MATCH_VFM(INTEL_HASWELL_L,		0x20), +	X86_MATCH_VFM(INTEL_HASWELL_G,		0x17), -	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL,		0x25), -	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G,	0x17), +	X86_MATCH_VFM(INTEL_BROADWELL,		0x25), +	X86_MATCH_VFM(INTEL_BROADWELL_G,	0x17), -	X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L,		0xb2), -	X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE,		0xb2), +	X86_MATCH_VFM(INTEL_SKYLAKE_L,		0xb2), +	X86_MATCH_VFM(INTEL_SKYLAKE,		0xb2), -	X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L,		0x52), -	X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE,		0x52), +	X86_MATCH_VFM(INTEL_KABYLAKE_L,		0x52), +	X86_MATCH_VFM(INTEL_KABYLAKE,		0x52),  	{},  }; @@ -694,7 +644,7 @@ void lapic_update_tsc_freq(void)  static __initdata int lapic_cal_loops = -1;  static __initdata long lapic_cal_t1, lapic_cal_t2;  static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata u32 lapic_cal_pm1, lapic_cal_pm2;  static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;  /* @@ -704,7 +654,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)  {  	unsigned long long tsc = 0;  	long tapic = apic_read(APIC_TMCCT); -	unsigned long pm = acpi_pm_read_early(); +	u32 pm = acpi_pm_read_early();  	if (boot_cpu_has(X86_FEATURE_TSC))  		tsc = rdtsc(); @@ -729,7 +679,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)  }  static int __init -calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) +calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)  {  	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;  	const long pm_thresh = pm_100ms / 100; @@ -740,7 +690,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)  	return -1;  #endif -	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); +	apic_pr_verbose("... PM-Timer delta = %u\n", deltapm);  	/* Check, if the PM timer is available */  	if (!deltapm) @@ -750,14 +700,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)  	if (deltapm > (pm_100ms - pm_thresh) &&  	    deltapm < (pm_100ms + pm_thresh)) { -		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); +		apic_pr_verbose("... PM-Timer result ok\n");  		return 0;  	}  	res = (((u64)deltapm) *  mult) >> 22;  	do_div(res, 1000000); -	pr_warn("APIC calibration not consistent " -		"with PM-Timer: %ldms instead of 100ms\n", (long)res); +	pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n", +		(long)res);  	/* Correct the lapic counter value */  	res = (((u64)(*delta)) * pm_100ms); @@ -770,9 +720,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)  	if (boot_cpu_has(X86_FEATURE_TSC)) {  		res = (((u64)(*deltatsc)) * pm_100ms);  		do_div(res, deltapm); -		apic_printk(APIC_VERBOSE, "TSC delta adjusted to " -					  "PM-Timer: %lu (%ld)\n", -					(unsigned long)res, *deltatsc); +		apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n", +				(unsigned long)res, *deltatsc);  		*deltatsc = (long)res;  	} @@ -807,12 +756,12 @@ bool __init apic_needs_pit(void)  		return true;  	/* Is there an APIC at all or is it disabled? */ -	if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) +	if (!boot_cpu_has(X86_FEATURE_APIC) || apic_is_disabled)  		return true;  	/*  	 * If interrupt delivery mode is legacy PIC or virtual wire without -	 * configuration, the local APIC timer wont be set up. Make sure +	 * configuration, the local APIC timer won't be set up. Make sure  	 * that the PIT is initialized.  	 */  	if (apic_intr_mode == APIC_PIC || @@ -855,8 +804,7 @@ static int __init calibrate_APIC_clock(void)  	 * in the clockevent structure and return.  	 */  	if (!lapic_init_clockevent()) { -		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", -			    lapic_timer_period); +		apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period);  		/*  		 * Direct calibration methods must have an always running  		 * local APIC timer, no need for broadcast timer. @@ -865,8 +813,7 @@ static int __init calibrate_APIC_clock(void)  		return 0;  	} -	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" -		    "calibrating APIC timer ...\n"); +	apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n");  	/*  	 * There are platforms w/o global clockevent devices. Instead of @@ -929,7 +876,7 @@ static int __init calibrate_APIC_clock(void)  	/* Build delta t1-t2 as apic timer counts down */  	delta = lapic_cal_t1 - lapic_cal_t2; -	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); +	apic_pr_verbose("... lapic delta = %ld\n", delta);  	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); @@ -940,22 +887,19 @@ static int __init calibrate_APIC_clock(void)  	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;  	lapic_init_clockevent(); -	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); -	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); -	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", -		    lapic_timer_period); +	apic_pr_verbose("..... delta %ld\n", delta); +	apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult); +	apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period);  	if (boot_cpu_has(X86_FEATURE_TSC)) { -		apic_printk(APIC_VERBOSE, "..... CPU clock speed is " -			    "%ld.%04ld MHz.\n", -			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), -			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); +		apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n", +				(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), +				(deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));  	} -	apic_printk(APIC_VERBOSE, "..... host bus clock speed is " -		    "%u.%04u MHz.\n", -		    lapic_timer_period / (1000000 / HZ), -		    lapic_timer_period % (1000000 / HZ)); +	apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n", +			lapic_timer_period / (1000000 / HZ), +			lapic_timer_period % (1000000 / HZ));  	/*  	 * Do a sanity check on the APIC calibration result @@ -974,7 +918,7 @@ static int __init calibrate_APIC_clock(void)  	 * available.  	 */  	if (!pm_referenced && global_clock_event) { -		apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); +		apic_pr_verbose("... verify APIC timer\n");  		/*  		 * Setup the apic timer manually @@ -995,11 +939,11 @@ static int __init calibrate_APIC_clock(void)  		/* Jiffies delta */  		deltaj = lapic_cal_j2 - lapic_cal_j1; -		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); +		apic_pr_verbose("... jiffies delta = %lu\n", deltaj);  		/* Check, if the jiffies result is consistent */  		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) -			apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); +			apic_pr_verbose("... jiffies result ok\n");  		else  			levt->features |= CLOCK_EVT_FEAT_DUMMY;  	} @@ -1107,7 +1051,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)  {  	struct pt_regs *old_regs = set_irq_regs(regs); -	ack_APIC_irq(); +	apic_eoi();  	trace_local_timer_entry(LOCAL_TIMER_VECTOR);  	local_apic_timer_interrupt();  	trace_local_timer_exit(LOCAL_TIMER_VECTOR); @@ -1115,11 +1059,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)  	set_irq_regs(old_regs);  } -int setup_profiling_timer(unsigned int multiplier) -{ -	return -EINVAL; -} -  /*   * Local APIC start and shutdown   */ @@ -1136,8 +1075,7 @@ void clear_local_APIC(void)  	int maxlvt;  	u32 v; -	/* APIC hasn't been mapped yet */ -	if (!x2apic_mode && !apic_phys) +	if (!apic_accessible())  		return;  	maxlvt = lapic_get_maxlvt(); @@ -1227,8 +1165,7 @@ void apic_soft_disable(void)   */  void disable_local_APIC(void)  { -	/* APIC hasn't been mapped yet */ -	if (!x2apic_mode && !apic_phys) +	if (!apic_accessible())  		return;  	apic_soft_disable(); @@ -1291,9 +1228,8 @@ void __init sync_Arb_IDs(void)  	 */  	apic_wait_icr_idle(); -	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); -	apic_write(APIC_ICR, APIC_DEST_ALLINC | -			APIC_INT_LEVELTRIG | APIC_DM_INIT); +	apic_pr_debug("Synchronizing Arb IDs.\n"); +	apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);  }  enum apic_intr_mode_id apic_intr_mode __ro_after_init; @@ -1301,7 +1237,7 @@ enum apic_intr_mode_id apic_intr_mode __ro_after_init;  static int __init __apic_intr_mode_select(void)  {  	/* Check kernel option */ -	if (disable_apic) { +	if (apic_is_disabled) {  		pr_info("APIC disabled via kernel command line\n");  		return APIC_PIC;  	} @@ -1310,7 +1246,7 @@ static int __init __apic_intr_mode_select(void)  #ifdef CONFIG_X86_64  	/* On 64-bit, the APIC must be integrated, Check local APIC only */  	if (!boot_cpu_has(X86_FEATURE_APIC)) { -		disable_apic = 1; +		apic_is_disabled = true;  		pr_info("APIC disabled by BIOS\n");  		return APIC_PIC;  	} @@ -1319,16 +1255,15 @@ static int __init __apic_intr_mode_select(void)  	/* Neither 82489DX nor integrated APIC ? */  	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { -		disable_apic = 1; +		apic_is_disabled = true;  		return APIC_PIC;  	}  	/* If the BIOS pretends there is an integrated APIC ? */  	if (!boot_cpu_has(X86_FEATURE_APIC) &&  		APIC_INTEGRATED(boot_cpu_apic_version)) { -		disable_apic = 1; -		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", -				       boot_cpu_physical_apicid); +		apic_is_disabled = true; +		pr_err(FW_BUG "Local APIC not detected, force emulation\n");  		return APIC_PIC;  	}  #endif @@ -1349,12 +1284,6 @@ static int __init __apic_intr_mode_select(void)  		pr_info("APIC: SMP mode deactivated\n");  		return APIC_SYMMETRIC_IO_NO_ROUTING;  	} - -	if (read_apic_id() != boot_cpu_physical_apicid) { -		panic("Boot APIC ID in local APIC unexpected (%d vs %d)", -		     read_apic_id(), boot_cpu_physical_apicid); -		/* Or can we switch back to PIC here? */ -	}  #endif  	return APIC_SYMMETRIC_IO; @@ -1441,7 +1370,7 @@ void __init apic_intr_mode_init(void)  		break;  	} -	default_setup_apic_routing(); +	x86_64_probe_apic();  	if (x86_platform.apic_post_init)  		x86_platform.apic_post_init(); @@ -1484,10 +1413,10 @@ static void lapic_setup_esr(void)  	if (maxlvt > 3)  		apic_write(APIC_ESR, 0);  	value = apic_read(APIC_ESR); -	if (value != oldvalue) -		apic_printk(APIC_VERBOSE, "ESR value before enabling " -			"vector: 0x%08x  after: 0x%08x\n", -			oldvalue, value); +	if (value != oldvalue) { +		apic_pr_verbose("ESR value before enabling vector: 0x%08x  after: 0x%08x\n", +				oldvalue, value); +	}  }  #define APIC_IR_REGS		APIC_ISR_NR @@ -1523,7 +1452,7 @@ static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)  		 * per set bit.  		 */  		for_each_set_bit(bit, isr->map, APIC_IR_BITS) -			ack_APIC_irq(); +			apic_eoi();  		return true;  	} @@ -1535,7 +1464,7 @@ static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)   * interrupt from previous kernel might still have ISR bit set.   *   * Most probably by now the CPU has serviced that pending interrupt and it - * might not have done the ack_APIC_irq() because it thought, interrupt + * might not have done the apic_eoi() because it thought, interrupt   * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear   * the ISR bit and cpu thinks it has already serviced the interrupt. Hence   * a vector might get locked. It was noticed for timer irq (vector @@ -1569,7 +1498,7 @@ static void setup_local_APIC(void)  	int cpu = smp_processor_id();  	unsigned int value; -	if (disable_apic) { +	if (apic_is_disabled) {  		disable_ioapic_support();  		return;  	} @@ -1592,35 +1521,14 @@ static void setup_local_APIC(void)  	}  #endif  	/* -	 * Double-check whether this APIC is really registered. -	 * This is meaningless in clustered apic mode, so we skip it. -	 */ -	BUG_ON(!apic->apic_id_registered()); - -	/*  	 * Intel recommends to set DFR, LDR and TPR before enabling  	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel -	 * document number 292116).  So here it goes... +	 * document number 292116). +	 * +	 * Except for APICs which operate in physical destination mode.  	 */ -	apic->init_apic_ldr(); - -#ifdef CONFIG_X86_32 -	if (apic->dest_mode_logical) { -		int logical_apicid, ldr_apicid; - -		/* -		 * APIC LDR is initialized.  If logical_apicid mapping was -		 * initialized during get_smp_config(), make sure it matches -		 * the actual value. -		 */ -		logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); -		ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -		if (logical_apicid != BAD_APICID) -			WARN_ON(logical_apicid != ldr_apicid); -		/* Always use the value from LDR. */ -		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; -	} -#endif +	if (apic->init_apic_ldr) +		apic->init_apic_ldr();  	/*  	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC @@ -1693,12 +1601,12 @@ static void setup_local_APIC(void)  	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro  	 */  	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; -	if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { +	if (!cpu && (pic_mode || !value || ioapic_is_disabled)) {  		value = APIC_DM_EXTINT; -		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); +		apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu);  	} else {  		value = APIC_DM_EXTINT | APIC_LVT_MASKED; -		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); +		apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu);  	}  	apic_write(APIC_LVT0, value); @@ -1750,17 +1658,49 @@ void apic_ap_setup(void)  	end_local_APIC_setup();  } +static __init void apic_read_boot_cpu_id(bool x2apic) +{ +	/* +	 * This can be invoked from check_x2apic() before the APIC has been +	 * selected. But that code knows for sure that the BIOS enabled +	 * X2APIC. +	 */ +	if (x2apic) { +		boot_cpu_physical_apicid = native_apic_msr_read(APIC_ID); +		boot_cpu_apic_version = GET_APIC_VERSION(native_apic_msr_read(APIC_LVR)); +	} else { +		boot_cpu_physical_apicid = read_apic_id(); +		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); +	} +	topology_register_boot_apic(boot_cpu_physical_apicid); +} +  #ifdef CONFIG_X86_X2APIC  int x2apic_mode;  EXPORT_SYMBOL_GPL(x2apic_mode);  enum {  	X2APIC_OFF, -	X2APIC_ON,  	X2APIC_DISABLED, +	/* All states below here have X2APIC enabled */ +	X2APIC_ON, +	X2APIC_ON_LOCKED  };  static int x2apic_state; +static bool x2apic_hw_locked(void) +{ +	u64 x86_arch_cap_msr; +	u64 msr; + +	x86_arch_cap_msr = x86_read_arch_cap_msr(); +	if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { +		rdmsrq(MSR_IA32_XAPIC_DISABLE_STATUS, msr); +		return (msr & LEGACY_XAPIC_DISABLED); +	} +	return false; +} +  static void __x2apic_disable(void)  {  	u64 msr; @@ -1768,12 +1708,12 @@ static void __x2apic_disable(void)  	if (!boot_cpu_has(X86_FEATURE_APIC))  		return; -	rdmsrl(MSR_IA32_APICBASE, msr); +	rdmsrq(MSR_IA32_APICBASE, msr);  	if (!(msr & X2APIC_ENABLE))  		return;  	/* Disable xapic and x2apic first and then reenable xapic mode */ -	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); -	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); +	wrmsrq(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); +	wrmsrq(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);  	printk_once(KERN_INFO "x2apic disabled\n");  } @@ -1781,23 +1721,27 @@ static void __x2apic_enable(void)  {  	u64 msr; -	rdmsrl(MSR_IA32_APICBASE, msr); +	rdmsrq(MSR_IA32_APICBASE, msr);  	if (msr & X2APIC_ENABLE)  		return; -	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); +	wrmsrq(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);  	printk_once(KERN_INFO "x2apic enabled\n");  }  static int __init setup_nox2apic(char *str)  {  	if (x2apic_enabled()) { -		int apicid = native_apic_msr_read(APIC_ID); +		u32 apicid = native_apic_msr_read(APIC_ID);  		if (apicid >= 255) {  			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",  				apicid);  			return 0;  		} +		if (x2apic_hw_locked()) { +			pr_warn("APIC locked in x2apic mode, can't disable\n"); +			return 0; +		}  		pr_warn("x2apic already enabled.\n");  		__x2apic_disable();  	} @@ -1812,32 +1756,53 @@ early_param("nox2apic", setup_nox2apic);  void x2apic_setup(void)  {  	/* -	 * If x2apic is not in ON state, disable it if already enabled +	 * Try to make the AP's APIC state match that of the BSP,  but if the +	 * BSP is unlocked and the AP is locked then there is a state mismatch. +	 * Warn about the mismatch in case a GP fault occurs due to a locked AP +	 * trying to be turned off. +	 */ +	if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked()) +		pr_warn("x2apic lock mismatch between BSP and AP.\n"); +	/* +	 * If x2apic is not in ON or LOCKED state, disable it if already enabled  	 * from BIOS.  	 */ -	if (x2apic_state != X2APIC_ON) { +	if (x2apic_state < X2APIC_ON) {  		__x2apic_disable();  		return;  	}  	__x2apic_enable();  } +static __init void apic_set_fixmap(bool read_apic); +  static __init void x2apic_disable(void)  { -	u32 x2apic_id, state = x2apic_state; - -	x2apic_mode = 0; -	x2apic_state = X2APIC_DISABLED; +	u32 x2apic_id; -	if (state != X2APIC_ON) +	if (x2apic_state < X2APIC_ON)  		return;  	x2apic_id = read_apic_id();  	if (x2apic_id >= 255)  		panic("Cannot disable x2apic, id: %08x\n", x2apic_id); +	if (x2apic_hw_locked()) { +		pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id); +		return; +	} +  	__x2apic_disable(); -	register_lapic_address(mp_lapic_addr); + +	x2apic_mode = 0; +	x2apic_state = X2APIC_DISABLED; + +	/* +	 * Don't reread the APIC ID as it was already done from +	 * check_x2apic() and the APIC driver still is a x2APIC variant, +	 * which fails to do the read after x2APIC was disabled. +	 */ +	apic_set_fixmap(false);  }  static __init void x2apic_enable(void) @@ -1894,22 +1859,29 @@ void __init check_x2apic(void)  	if (x2apic_enabled()) {  		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");  		x2apic_mode = 1; -		x2apic_state = X2APIC_ON; +		if (x2apic_hw_locked()) +			x2apic_state = X2APIC_ON_LOCKED; +		else +			x2apic_state = X2APIC_ON; +		apic_read_boot_cpu_id(true);  	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {  		x2apic_state = X2APIC_DISABLED;  	}  }  #else /* CONFIG_X86_X2APIC */ -static int __init validate_x2apic(void) +void __init check_x2apic(void)  {  	if (!apic_is_x2apic_enabled()) -		return 0; +		return;  	/* -	 * Checkme: Can we simply turn off x2apic here instead of panic? +	 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC?  	 */ -	panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); +	pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n"); +	pr_err("Disabling APIC, expect reduced performance and functionality.\n"); + +	apic_is_disabled = true; +	setup_clear_cpu_cap(X86_FEATURE_APIC);  } -early_initcall(validate_x2apic);  static inline void try_to_enable_x2apic(int remap_mode) { }  static inline void __x2apic_enable(void) { } @@ -1920,7 +1892,7 @@ void __init enable_IR_x2apic(void)  	unsigned long flags;  	int ret, ir_stat; -	if (skip_ioapic_setup) { +	if (ioapic_is_disabled) {  		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");  		return;  	} @@ -1958,19 +1930,19 @@ void __init enable_IR_x2apic(void)   * On AMD64 we trust the BIOS - if it says no APIC it is likely   * not correctly set up (usually the APIC timer won't work etc.)   */ -static int __init detect_init_APIC(void) +static bool __init detect_init_APIC(void)  {  	if (!boot_cpu_has(X86_FEATURE_APIC)) {  		pr_info("No local APIC present\n"); -		return -1; +		return false;  	} -	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -	return 0; +	register_lapic_address(APIC_DEFAULT_PHYS_BASE); +	return true;  }  #else -static int __init apic_verify(void) +static bool __init apic_verify(unsigned long addr)  {  	u32 features, h, l; @@ -1981,28 +1953,28 @@ static int __init apic_verify(void)  	features = cpuid_edx(1);  	if (!(features & (1 << X86_FEATURE_APIC))) {  		pr_warn("Could not enable APIC!\n"); -		return -1; +		return false;  	}  	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); -	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;  	/* The BIOS may have set up the APIC at some other address */  	if (boot_cpu_data.x86 >= 6) {  		rdmsr(MSR_IA32_APICBASE, l, h);  		if (l & MSR_IA32_APICBASE_ENABLE) -			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; +			addr = l & MSR_IA32_APICBASE_BASE;  	} +	register_lapic_address(addr);  	pr_info("Found and enabled local APIC!\n"); -	return 0; +	return true;  } -int __init apic_force_enable(unsigned long addr) +bool __init apic_force_enable(unsigned long addr)  {  	u32 h, l; -	if (disable_apic) -		return -1; +	if (apic_is_disabled) +		return false;  	/*  	 * Some BIOSes disable the local APIC in the APIC_BASE @@ -2019,17 +1991,17 @@ int __init apic_force_enable(unsigned long addr)  			enabled_via_apicbase = 1;  		}  	} -	return apic_verify(); +	return apic_verify(addr);  }  /*   * Detect and initialize APIC   */ -static int __init detect_init_APIC(void) +static bool __init detect_init_APIC(void)  {  	/* Disabled by kernel option? */ -	if (disable_apic) -		return -1; +	if (apic_is_disabled) +		return false;  	switch (boot_cpu_data.x86_vendor) {  	case X86_VENDOR_AMD: @@ -2040,8 +2012,8 @@ static int __init detect_init_APIC(void)  	case X86_VENDOR_HYGON:  		break;  	case X86_VENDOR_INTEL: -		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || -		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) +		if ((boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)) || +		    boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO)  			break;  		goto no_apic;  	default: @@ -2056,22 +2028,22 @@ static int __init detect_init_APIC(void)  		if (!force_enable_local_apic) {  			pr_info("Local APIC disabled by BIOS -- "  				"you can enable it with \"lapic\"\n"); -			return -1; +			return false;  		} -		if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) -			return -1; +		if (!apic_force_enable(APIC_DEFAULT_PHYS_BASE)) +			return false;  	} else { -		if (apic_verify()) -			return -1; +		if (!apic_verify(APIC_DEFAULT_PHYS_BASE)) +			return false;  	}  	apic_pm_activate(); -	return 0; +	return true;  no_apic:  	pr_info("No local APIC present or hardware disabled\n"); -	return -1; +	return false;  }  #endif @@ -2080,63 +2052,37 @@ no_apic:   */  void __init init_apic_mappings(void)  { -	unsigned int new_apicid; -  	if (apic_validate_deadline_timer())  		pr_info("TSC deadline timer available\n"); -	if (x2apic_mode) { -		boot_cpu_physical_apicid = read_apic_id(); +	if (x2apic_mode)  		return; -	} - -	/* If no local APIC can be found return early */ -	if (!smp_found_config && detect_init_APIC()) { -		/* lets NOP'ify apic operations */ -		pr_info("APIC: disable apic facility\n"); -		apic_disable(); -	} else { -		apic_phys = mp_lapic_addr; -		/* -		 * If the system has ACPI MADT tables or MP info, the LAPIC -		 * address is already registered. -		 */ -		if (!acpi_lapic && !smp_found_config) -			register_lapic_address(apic_phys); +	if (!smp_found_config) { +		if (!detect_init_APIC()) { +			pr_info("APIC: disable apic facility\n"); +			apic_disable(); +		}  	} +} -	/* -	 * Fetch the APIC ID of the BSP in case we have a -	 * default configuration (or the MP table is broken). -	 */ -	new_apicid = read_apic_id(); -	if (boot_cpu_physical_apicid != new_apicid) { -		boot_cpu_physical_apicid = new_apicid; -		/* -		 * yeah -- we lie about apic_version -		 * in case if apic was disabled via boot option -		 * but it's not a problem for SMP compiled kernel -		 * since apic_intr_mode_select is prepared for such -		 * a case and disable smp mode -		 */ -		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); -	} +static __init void apic_set_fixmap(bool read_apic) +{ +	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); +	apic_mmio_base = APIC_BASE; +	apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr); +	if (read_apic) +		apic_read_boot_cpu_id(false);  }  void __init register_lapic_address(unsigned long address)  { +	/* This should only happen once */ +	WARN_ON_ONCE(mp_lapic_addr);  	mp_lapic_addr = address; -	if (!x2apic_mode) { -		set_fixmap_nocache(FIX_APIC_BASE, address); -		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", -			    APIC_BASE, address); -	} -	if (boot_cpu_physical_apicid == -1U) { -		boot_cpu_physical_apicid  = read_apic_id(); -		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); -	} +	if (!x2apic_mode) +		apic_set_fixmap(true);  }  /* @@ -2173,7 +2119,7 @@ static noinline void handle_spurious_interrupt(u8 vector)  	if (v & (1 << (vector & 0x1f))) {  		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",  			vector, smp_processor_id()); -		ack_APIC_irq(); +		apic_eoi();  	} else {  		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",  			vector, smp_processor_id()); @@ -2224,21 +2170,20 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)  	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */  		apic_write(APIC_ESR, 0);  	v = apic_read(APIC_ESR); -	ack_APIC_irq(); +	apic_eoi();  	atomic_inc(&irq_err_count); -	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", -		    smp_processor_id(), v); +	apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);  	v &= 0xff;  	while (v) {  		if (v & 0x1) -			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); +			apic_pr_debug_cont(" : %s", error_interrupt_reason[i]);  		i++;  		v >>= 1;  	} -	apic_printk(APIC_DEBUG, KERN_CONT "\n"); +	apic_pr_debug_cont("\n");  	trace_error_apic_exit(ERROR_APIC_VECTOR);  } @@ -2258,8 +2203,7 @@ static void __init connect_bsp_APIC(void)  		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's  		 * local APIC to INT and NMI lines.  		 */ -		apic_printk(APIC_VERBOSE, "leaving PIC mode, " -				"enabling APIC mode.\n"); +		apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n");  		imcr_pic_to_apic();  	}  #endif @@ -2284,8 +2228,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)  		 * IPIs, won't work beyond this point!  The only exception are  		 * INIT IPIs.  		 */ -		apic_printk(APIC_VERBOSE, "disabling APIC mode, " -				"entering PIC mode.\n"); +		apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n");  		imcr_apic_to_pic();  		return;  	} @@ -2330,195 +2273,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)  	apic_write(APIC_LVT1, value);  } -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - -/* - * Used to store mapping between logical CPU IDs and APIC IDs. - */ -static int cpuid_to_apicid[] = { -	[0 ... NR_CPUS - 1] = -1, -}; - -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ -	return phys_id == cpuid_to_apicid[cpu]; -} - -#ifdef CONFIG_SMP -/** - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @apicid: APIC ID to check - */ -bool apic_id_is_primary_thread(unsigned int apicid) -{ -	u32 mask; - -	if (smp_num_siblings == 1) -		return true; -	/* Isolate the SMT bit(s) in the APICID and check for 0 */ -	mask = (1U << (fls(smp_num_siblings) - 1)) - 1; -	return !(apicid & mask); -} -#endif - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) -{ -	int i; - -	/* -	 * cpuid <-> apicid mapping is persistent, so when a cpu is up, -	 * check if the kernel has allocated a cpuid for it. -	 */ -	for (i = 0; i < nr_logical_cpuids; i++) { -		if (cpuid_to_apicid[i] == apicid) -			return i; -	} - -	/* Allocate a new cpuid. */ -	if (nr_logical_cpuids >= nr_cpu_ids) { -		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " -			     "Processor %d/0x%x and the rest are ignored.\n", -			     nr_cpu_ids, nr_logical_cpuids, apicid); -		return -EINVAL; -	} - -	cpuid_to_apicid[nr_logical_cpuids] = apicid; -	return nr_logical_cpuids++; -} - -int generic_processor_info(int apicid, int version) -{ -	int cpu, max = nr_cpu_ids; -	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, -				phys_cpu_present_map); - -	/* -	 * boot_cpu_physical_apicid is designed to have the apicid -	 * returned by read_apic_id(), i.e, the apicid of the -	 * currently booting-up processor. However, on some platforms, -	 * it is temporarily modified by the apicid reported as BSP -	 * through MP table. Concretely: -	 * -	 * - arch/x86/kernel/mpparse.c: MP_processor_info() -	 * - arch/x86/mm/amdtopology.c: amd_numa_init() -	 * -	 * This function is executed with the modified -	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel -	 * parameter doesn't work to disable APs on kdump 2nd kernel. -	 * -	 * Since fixing handling of boot_cpu_physical_apicid requires -	 * another discussion and tests on each platform, we leave it -	 * for now and here we use read_apic_id() directly in this -	 * function, generic_processor_info(). -	 */ -	if (disabled_cpu_apicid != BAD_APICID && -	    disabled_cpu_apicid != read_apic_id() && -	    disabled_cpu_apicid == apicid) { -		int thiscpu = num_processors + disabled_cpus; - -		pr_warn("APIC: Disabling requested cpu." -			" Processor %d/0x%x ignored.\n", thiscpu, apicid); - -		disabled_cpus++; -		return -ENODEV; -	} - -	/* -	 * If boot cpu has not been detected yet, then only allow upto -	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu -	 */ -	if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && -	    apicid != boot_cpu_physical_apicid) { -		int thiscpu = max + disabled_cpus - 1; - -		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" -			" reached. Keeping one slot for boot cpu." -			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - -		disabled_cpus++; -		return -ENODEV; -	} - -	if (num_processors >= nr_cpu_ids) { -		int thiscpu = max + disabled_cpus; - -		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " -			"Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - -		disabled_cpus++; -		return -EINVAL; -	} - -	if (apicid == boot_cpu_physical_apicid) { -		/* -		 * x86_bios_cpu_apicid is required to have processors listed -		 * in same order as logical cpu numbers. Hence the first -		 * entry is BSP, and so on. -		 * boot_cpu_init() already hold bit 0 in cpu_present_mask -		 * for BSP. -		 */ -		cpu = 0; - -		/* Logical cpuid 0 is reserved for BSP. */ -		cpuid_to_apicid[0] = apicid; -	} else { -		cpu = allocate_logical_cpuid(apicid); -		if (cpu < 0) { -			disabled_cpus++; -			return -EINVAL; -		} -	} - -	/* -	 * Validate version -	 */ -	if (version == 0x0) { -		pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", -			cpu, apicid); -		version = 0x10; -	} - -	if (version != boot_cpu_apic_version) { -		pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", -			boot_cpu_apic_version, cpu, version); -	} - -	if (apicid > max_physical_apicid) -		max_physical_apicid = apicid; - -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) -	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; -	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; -#endif -#ifdef CONFIG_X86_32 -	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = -		apic->x86_32_early_logical_apicid(cpu); -#endif -	set_cpu_possible(cpu, true); -	physid_set(apicid, phys_cpu_present_map); -	set_cpu_present(cpu, true); -	num_processors++; - -	return cpu; -} - -int hard_smp_processor_id(void) -{ -	return read_apic_id(); -} -  void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,  			   bool dmar)  { @@ -2559,49 +2313,9 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)  }  EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); -#ifdef CONFIG_X86_64 -void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler) -{ -	struct apic **drv; - -	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) -		(*drv)->wakeup_secondary_cpu_64 = handler; -} -#endif - -/* - * Override the generic EOI implementation with an optimized version. - * Only called during early boot when only one CPU is active and with - * interrupts disabled, so we know this does not race with actual APIC driver - * use. - */ -void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) -{ -	struct apic **drv; - -	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { -		/* Should happen once for each apic */ -		WARN_ON((*drv)->eoi_write == eoi_write); -		(*drv)->native_eoi_write = (*drv)->eoi_write; -		(*drv)->eoi_write = eoi_write; -	} -} -  static void __init apic_bsp_up_setup(void)  { -#ifdef CONFIG_X86_64 -	apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); -#else -	/* -	 * Hack: In case of kdump, after a crash, kernel might be booting -	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid -	 * might be zero if read from MP tables. Get it from LAPIC. -	 */ -# ifdef CONFIG_CRASH_DUMP -	boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif -	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); +	reset_phys_cpu_present_map(boot_cpu_physical_apicid);  }  /** @@ -2646,7 +2360,7 @@ static struct {  	 */  	int active;  	/* r/w apic fields */ -	unsigned int apic_id; +	u32 apic_id;  	unsigned int apic_taskpri;  	unsigned int apic_ldr;  	unsigned int apic_dfr; @@ -2866,19 +2580,12 @@ int apic_is_clustered_box(void)  /*   * APIC command line parameters   */ -static int __init setup_disableapic(char *arg) +static int __init setup_nolapic(char *arg)  { -	disable_apic = 1; +	apic_is_disabled = true;  	setup_clear_cpu_cap(X86_FEATURE_APIC);  	return 0;  } -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ -	return setup_disableapic(arg); -}  early_param("nolapic", setup_nolapic);  static int __init parse_lapic_timer_c2_ok(char *arg) @@ -2905,11 +2612,11 @@ early_param("nolapic_timer", parse_nolapic_timer);  static int __init apic_set_verbosity(char *arg)  {  	if (!arg)  { -#ifdef CONFIG_X86_64 -		skip_ioapic_setup = 0; +		if (IS_ENABLED(CONFIG_X86_32)) +			return -EINVAL; + +		ioapic_is_disabled = false;  		return 0; -#endif -		return -EINVAL;  	}  	if (strcmp("debug", arg) == 0) @@ -2930,11 +2637,11 @@ early_param("apic", apic_set_verbosity);  static int __init lapic_insert_resource(void)  { -	if (!apic_phys) +	if (!apic_mmio_base)  		return -1;  	/* Put local APIC into the resource map. */ -	lapic_resource.start = apic_phys; +	lapic_resource.start = apic_mmio_base;  	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;  	insert_resource(&iomem_resource, &lapic_resource); @@ -2947,15 +2654,6 @@ static int __init lapic_insert_resource(void)   */  late_initcall(lapic_insert_resource); -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ -	if (!arg || !get_option(&arg, &disabled_cpu_apicid)) -		return -EINVAL; - -	return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); -  static int __init apic_set_extnmi(char *arg)  {  	if (!arg) diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 02b4839478b1..9ef3be866832 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -6,6 +6,8 @@  #include <linux/irq.h>  #include <asm/apic.h> +#include "local.h" +  u32 apic_default_calc_apicid(unsigned int cpu)  {  	return per_cpu(x86_cpu_to_apicid, cpu); @@ -16,31 +18,25 @@ u32 apic_flat_calc_apicid(unsigned int cpu)  	return 1U << cpu;  } -bool default_check_apicid_used(physid_mask_t *map, int apicid) -{ -	return physid_isset(apicid, *map); -} - -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ -	*retmap = *phys_map; -} - -int default_cpu_present_to_apicid(int mps_cpu) +u32 default_cpu_present_to_apicid(int mps_cpu)  {  	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) -		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); +		return (int)per_cpu(x86_cpu_to_apicid, mps_cpu);  	else  		return BAD_APICID;  }  EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); -int default_check_phys_apicid_present(int phys_apicid) +/* + * Set up the logical destination ID when the APIC operates in logical + * destination mode. + */ +void default_init_apic_ldr(void)  { -	return physid_isset(phys_apicid, phys_cpu_present_map); -} +	unsigned long val; -int default_apic_id_valid(u32 apicid) -{ -	return (apicid < 255); +	apic_write(APIC_DFR, APIC_DFR_FLAT); +	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; +	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); +	apic_write(APIC_LDR, val);  } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 8f72b4351c9f..e0308d8c4e6c 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -8,192 +8,25 @@   * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and   * James Cleverdon.   */ -#include <linux/cpumask.h>  #include <linux/export.h> -#include <linux/acpi.h> -#include <asm/jailhouse_para.h>  #include <asm/apic.h>  #include "local.h" -static struct apic apic_physflat; -static struct apic apic_flat; - -struct apic *apic __ro_after_init = &apic_flat; -EXPORT_SYMBOL_GPL(apic); - -static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -	return 1; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116).  So here it goes... - */ -void flat_init_apic_ldr(void) -{ -	unsigned long val; -	unsigned long num, id; - -	num = smp_processor_id(); -	id = 1UL << num; -	apic_write(APIC_DFR, APIC_DFR_FLAT); -	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; -	val |= SET_APIC_LOGICAL_ID(id); -	apic_write(APIC_LDR, val); -} - -static void _flat_send_IPI_mask(unsigned long mask, int vector) -{ -	unsigned long flags; - -	local_irq_save(flags); -	__default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); -	local_irq_restore(flags); -} - -static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ -	unsigned long mask = cpumask_bits(cpumask)[0]; - -	_flat_send_IPI_mask(mask, vector); -} - -static void -flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) -{ -	unsigned long mask = cpumask_bits(cpumask)[0]; -	int cpu = smp_processor_id(); - -	if (cpu < BITS_PER_LONG) -		__clear_bit(cpu, &mask); - -	_flat_send_IPI_mask(mask, vector); -} - -static unsigned int flat_get_apic_id(unsigned long x) +static u32 physflat_get_apic_id(u32 x)  {  	return (x >> 24) & 0xFF;  } -static u32 set_apic_id(unsigned int id) -{ -	return (id & 0xFF) << 24; -} - -static unsigned int read_xapic_id(void) -{ -	return flat_get_apic_id(apic_read(APIC_ID)); -} - -static int flat_apic_id_registered(void) -{ -	return physid_isset(read_xapic_id(), phys_cpu_present_map); -} - -static int flat_phys_pkg_id(int initial_apic_id, int index_msb) -{ -	return initial_apic_id >> index_msb; -} - -static int flat_probe(void) +static int physflat_probe(void)  {  	return 1;  } -static struct apic apic_flat __ro_after_init = { -	.name				= "flat", -	.probe				= flat_probe, -	.acpi_madt_oem_check		= flat_acpi_madt_oem_check, -	.apic_id_valid			= default_apic_id_valid, -	.apic_id_registered		= flat_apic_id_registered, - -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED, -	.dest_mode_logical		= true, - -	.disable_esr			= 0, - -	.check_apicid_used		= NULL, -	.init_apic_ldr			= flat_init_apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL, -	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= flat_phys_pkg_id, - -	.get_apic_id			= flat_get_apic_id, -	.set_apic_id			= set_apic_id, - -	.calc_dest_apicid		= apic_flat_calc_apicid, - -	.send_IPI			= default_send_IPI_single, -	.send_IPI_mask			= flat_send_IPI_mask, -	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself, -	.send_IPI_allbutself		= default_send_IPI_allbutself, -	.send_IPI_all			= default_send_IPI_all, -	.send_IPI_self			= default_send_IPI_self, - -	.inquire_remote_apic		= default_inquire_remote_apic, - -	.read				= native_apic_mem_read, -	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, -	.icr_read			= native_apic_icr_read, -	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= native_apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle, -}; - -/* - * Physflat mode is used when there are more than 8 CPUs on a system. - * We cannot use logical delivery in this case because the mask - * overflows, so use physical mode. - */  static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  { -#ifdef CONFIG_ACPI -	/* -	 * Quirk: some x86_64 machines can only use physical APIC mode -	 * regardless of how many processors are present (x86_64 ES7000 -	 * is an example). -	 */ -	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && -		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { -		printk(KERN_DEBUG "system APIC only can use physical flat"); -		return 1; -	} - -	if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { -		printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); -		return 1; -	} -#endif - -	return 0; -} - -static void physflat_init_apic_ldr(void) -{ -	/* -	 * LDR and DFR are not involved in physflat mode, rather: -	 * "In physical destination mode, the destination processor is -	 * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1) -	 */ -} - -static int physflat_probe(void) -{ -	if (apic == &apic_physflat || num_possible_cpus() > 8 || -	    jailhouse_paravirt()) -		return 1; - -	return 0; +	return 1;  }  static struct apic apic_physflat __ro_after_init = { @@ -201,25 +34,15 @@ static struct apic apic_physflat __ro_after_init = {  	.name				= "physical flat",  	.probe				= physflat_probe,  	.acpi_madt_oem_check		= physflat_acpi_madt_oem_check, -	.apic_id_valid			= default_apic_id_valid, -	.apic_id_registered		= flat_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= false,  	.disable_esr			= 0, -	.check_apicid_used		= NULL, -	.init_apic_ldr			= physflat_init_apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= flat_phys_pkg_id, -	.get_apic_id			= flat_get_apic_id, -	.set_apic_id			= set_apic_id, +	.max_apic_id			= 0xFE, +	.get_apic_id			= physflat_get_apic_id,  	.calc_dest_apicid		= apic_default_calc_apicid, @@ -229,19 +52,17 @@ static struct apic apic_physflat __ro_after_init = {  	.send_IPI_allbutself		= default_send_IPI_allbutself,  	.send_IPI_all			= default_send_IPI_all,  	.send_IPI_self			= default_send_IPI_self, - -	.inquire_remote_apic		= default_inquire_remote_apic, +	.nmi_to_offline_cpu		= true,  	.read				= native_apic_mem_read,  	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, +	.eoi				= native_apic_mem_eoi,  	.icr_read			= native_apic_icr_read,  	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= native_apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle, +	.wait_icr_idle			= apic_mem_wait_icr_idle, +	.safe_wait_icr_idle		= apic_mem_wait_icr_idle_timeout,  }; +apic_driver(apic_physflat); -/* - * We need to check for physflat first, so this order is important. - */ -apic_drivers(apic_physflat, apic_flat); +struct apic *apic __ro_after_init = &apic_physflat; +EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index fe78319e0f7a..58abb941c45b 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -8,111 +8,58 @@   * Though in case if apic is disabled (for some reason) we try   * to not uglify the caller's code and allow to call (some) apic routines   * like self-ipi, etc... + * + * FIXME: Remove this gunk. The above argument which was intentionally left + * in place is silly to begin with because none of the callbacks except for + * APIC::read/write() have a WARN_ON_ONCE() in them. Sigh...   */  #include <linux/cpumask.h>  #include <linux/thread_info.h>  #include <asm/apic.h> -static void noop_init_apic_ldr(void) { } +#include "local.h" +  static void noop_send_IPI(int cpu, int vector) { }  static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }  static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }  static void noop_send_IPI_allbutself(int vector) { }  static void noop_send_IPI_all(int vector) { }  static void noop_send_IPI_self(int vector) { } -static void noop_apic_wait_icr_idle(void) { }  static void noop_apic_icr_write(u32 low, u32 id) { } -static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip, +	unsigned int cpu)  {  	return -1;  } -static u32 noop_safe_apic_wait_icr_idle(void) -{ -	return 0; -} - -static u64 noop_apic_icr_read(void) -{ -	return 0; -} - -static int noop_phys_pkg_id(int cpuid_apic, int index_msb) -{ -	return 0; -} - -static unsigned int noop_get_apic_id(unsigned long x) -{ -	return 0; -} - -static int noop_probe(void) -{ -	/* -	 * NOOP apic should not ever be -	 * enabled via probe routine -	 */ -	return 0; -} - -static int noop_apic_id_registered(void) -{ -	/* -	 * if we would be really "pedantic" -	 * we should pass read_apic_id() here -	 * but since NOOP suppose APIC ID = 0 -	 * lets save a few cycles -	 */ -	return physid_isset(0, phys_cpu_present_map); -} +static u64 noop_apic_icr_read(void) { return 0; } +static u32 noop_get_apic_id(u32 apicid) { return 0; } +static void noop_apic_eoi(void) { }  static u32 noop_apic_read(u32 reg)  { -	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); +	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled);  	return 0;  } -static void noop_apic_write(u32 reg, u32 v) +static void noop_apic_write(u32 reg, u32 val)  { -	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); +	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !apic_is_disabled);  } -#ifdef CONFIG_X86_32 -static int noop_x86_32_early_logical_apicid(int cpu) -{ -	return BAD_APICID; -} -#endif -  struct apic apic_noop __ro_after_init = {  	.name				= "noop", -	.probe				= noop_probe, -	.acpi_madt_oem_check		= NULL, - -	.apic_id_valid			= default_apic_id_valid, -	.apic_id_registered		= noop_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= true,  	.disable_esr			= 0, -	.check_apicid_used		= default_check_apicid_used, -	.init_apic_ldr			= noop_init_apic_ldr, -	.ioapic_phys_id_map		= default_ioapic_phys_id_map, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= physid_set_mask_of_physid, - -	.check_phys_apicid_present	= default_check_phys_apicid_present, - -	.phys_pkg_id			= noop_phys_pkg_id, +	.max_apic_id			= 0xFE,  	.get_apic_id			= noop_get_apic_id, -	.set_apic_id			= NULL,  	.calc_dest_apicid		= apic_flat_calc_apicid, @@ -125,17 +72,9 @@ struct apic apic_noop __ro_after_init = {  	.wakeup_secondary_cpu		= noop_wakeup_secondary_cpu, -	.inquire_remote_apic		= NULL, -  	.read				= noop_apic_read,  	.write				= noop_apic_write, -	.eoi_write			= noop_apic_write, +	.eoi				= noop_apic_eoi,  	.icr_read			= noop_apic_icr_read,  	.icr_write			= noop_apic_icr_write, -	.wait_icr_idle			= noop_apic_wait_icr_idle, -	.safe_wait_icr_idle		= noop_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 -	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid, -#endif  }; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a54d817eb4b6..5c5be2d58242 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -14,6 +14,7 @@  #include <linux/init.h>  #include <linux/pgtable.h> +#include <asm/msr.h>  #include <asm/numachip/numachip.h>  #include <asm/numachip/numachip_csr.h> @@ -25,53 +26,27 @@ static const struct apic apic_numachip1;  static const struct apic apic_numachip2;  static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly; -static unsigned int numachip1_get_apic_id(unsigned long x) +static u32 numachip1_get_apic_id(u32 x)  {  	unsigned long value;  	unsigned int id = (x >> 24) & 0xff;  	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { -		rdmsrl(MSR_FAM10H_NODE_ID, value); +		rdmsrq(MSR_FAM10H_NODE_ID, value);  		id |= (value << 2) & 0xff00;  	}  	return id;  } -static u32 numachip1_set_apic_id(unsigned int id) -{ -	return (id & 0xff) << 24; -} - -static unsigned int numachip2_get_apic_id(unsigned long x) +static u32 numachip2_get_apic_id(u32 x)  {  	u64 mcfg; -	rdmsrl(MSR_FAM10H_MMIO_CONF_BASE, mcfg); +	rdmsrq(MSR_FAM10H_MMIO_CONF_BASE, mcfg);  	return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);  } -static u32 numachip2_set_apic_id(unsigned int id) -{ -	return id << 24; -} - -static int numachip_apic_id_valid(u32 apicid) -{ -	/* Trust what bootloader passes in MADT */ -	return 1; -} - -static int numachip_apic_id_registered(void) -{ -	return 1; -} - -static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) -{ -	return initial_apic_id >> index_msb; -} -  static void numachip1_apic_icr_write(int apicid, unsigned int val)  {  	write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -82,7 +57,7 @@ static void numachip2_apic_icr_write(int apicid, unsigned int val)  	numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val);  } -static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu)  {  	numachip_apic_icr_write(phys_apicid, APIC_DM_INIT);  	numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP | @@ -172,15 +147,15 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)  	u64 val;  	u32 nodes = 1; -	this_cpu_write(cpu_llc_id, node); +	c->topo.llc_id = node;  	/* Account for nodes per socket in multi-core-module processors */  	if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { -		rdmsrl(MSR_FAM10H_NODE_ID, val); +		rdmsrq(MSR_FAM10H_NODE_ID, val);  		nodes = ((val >> 3) & 7) + 1;  	} -	c->phys_proc_id = node / nodes; +	c->topo.pkg_id = node / nodes;  }  static int __init numachip_system_init(void) @@ -228,40 +203,19 @@ static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	return 1;  } -/* APIC IPIs are queued */ -static void numachip_apic_wait_icr_idle(void) -{ -} - -/* APIC NMI IPIs are queued */ -static u32 numachip_safe_apic_wait_icr_idle(void) -{ -	return 0; -} -  static const struct apic apic_numachip1 __refconst = {  	.name				= "NumaConnect system",  	.probe				= numachip1_probe,  	.acpi_madt_oem_check		= numachip1_acpi_madt_oem_check, -	.apic_id_valid			= numachip_apic_id_valid, -	.apic_id_registered		= numachip_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= false,  	.disable_esr			= 0, -	.check_apicid_used		= NULL, -	.init_apic_ldr			= flat_init_apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= numachip_phys_pkg_id, +	.max_apic_id			= UINT_MAX,  	.get_apic_id			= numachip1_get_apic_id, -	.set_apic_id			= numachip1_set_apic_id,  	.calc_dest_apicid		= apic_default_calc_apicid, @@ -273,15 +227,12 @@ static const struct apic apic_numachip1 __refconst = {  	.send_IPI_self			= numachip_send_IPI_self,  	.wakeup_secondary_cpu		= numachip_wakeup_secondary, -	.inquire_remote_apic		= NULL, /* REMRD not supported */  	.read				= native_apic_mem_read,  	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, +	.eoi				= native_apic_mem_eoi,  	.icr_read			= native_apic_icr_read,  	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= numachip_apic_wait_icr_idle, -	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,  };  apic_driver(apic_numachip1); @@ -290,25 +241,15 @@ static const struct apic apic_numachip2 __refconst = {  	.name				= "NumaConnect2 system",  	.probe				= numachip2_probe,  	.acpi_madt_oem_check		= numachip2_acpi_madt_oem_check, -	.apic_id_valid			= numachip_apic_id_valid, -	.apic_id_registered		= numachip_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= false,  	.disable_esr			= 0, -	.check_apicid_used		= NULL, -	.init_apic_ldr			= flat_init_apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= numachip_phys_pkg_id, +	.max_apic_id			= UINT_MAX,  	.get_apic_id			= numachip2_get_apic_id, -	.set_apic_id			= numachip2_set_apic_id,  	.calc_dest_apicid		= apic_default_calc_apicid, @@ -320,15 +261,12 @@ static const struct apic apic_numachip2 __refconst = {  	.send_IPI_self			= numachip_send_IPI_self,  	.wakeup_secondary_cpu		= numachip_wakeup_secondary, -	.inquire_remote_apic		= NULL, /* REMRD not supported */  	.read				= native_apic_mem_read,  	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, +	.eoi				= native_apic_mem_eoi,  	.icr_read			= native_apic_icr_read,  	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= numachip_apic_wait_icr_idle, -	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,  };  apic_driver(apic_numachip2); diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c deleted file mode 100644 index 77555f66c14d..000000000000 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ /dev/null @@ -1,189 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. - * - * Drives the local APIC in "clustered mode". - */ -#include <linux/cpumask.h> -#include <linux/dmi.h> -#include <linux/smp.h> - -#include <asm/apic.h> -#include <asm/io_apic.h> - -#include "local.h" - -static unsigned bigsmp_get_apic_id(unsigned long x) -{ -	return (x >> 24) & 0xFF; -} - -static int bigsmp_apic_id_registered(void) -{ -	return 1; -} - -static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) -{ -	return false; -} - -static int bigsmp_early_logical_apicid(int cpu) -{ -	/* on bigsmp, logical apicid is the same as physical */ -	return early_per_cpu(x86_cpu_to_apicid, cpu); -} - -/* - * bigsmp enables physical destination mode - * and doesn't use LDR and DFR - */ -static void bigsmp_init_apic_ldr(void) -{ -} - -static void bigsmp_setup_apic_routing(void) -{ -	printk(KERN_INFO -		"Enabling APIC mode:  Physflat.  Using %d I/O APICs\n", -		nr_ioapics); -} - -static int bigsmp_cpu_present_to_apicid(int mps_cpu) -{ -	if (mps_cpu < nr_cpu_ids) -		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - -	return BAD_APICID; -} - -static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ -	/* For clustered we don't have a good way to do this yet - hack */ -	physids_promote(0xFFL, retmap); -} - -static int bigsmp_check_phys_apicid_present(int phys_apicid) -{ -	return 1; -} - -static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) -{ -	return cpuid_apic >> index_msb; -} - -static void bigsmp_send_IPI_allbutself(int vector) -{ -	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void bigsmp_send_IPI_all(int vector) -{ -	default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); -} - -static int dmi_bigsmp; /* can be set by dmi scanners */ - -static int hp_ht_bigsmp(const struct dmi_system_id *d) -{ -	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); -	dmi_bigsmp = 1; - -	return 0; -} - - -static const struct dmi_system_id bigsmp_dmi_table[] = { -	{ hp_ht_bigsmp, "HP ProLiant DL760 G2", -		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"), -			DMI_MATCH(DMI_BIOS_VERSION, "P44-"), -		} -	}, - -	{ hp_ht_bigsmp, "HP ProLiant DL740", -		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"), -			DMI_MATCH(DMI_BIOS_VERSION, "P47-"), -		} -	}, -	{ } /* NULL entry stops DMI scanning */ -}; - -static int probe_bigsmp(void) -{ -	if (def_to_bigsmp) -		dmi_bigsmp = 1; -	else -		dmi_check_system(bigsmp_dmi_table); - -	return dmi_bigsmp; -} - -static struct apic apic_bigsmp __ro_after_init = { - -	.name				= "bigsmp", -	.probe				= probe_bigsmp, -	.acpi_madt_oem_check		= NULL, -	.apic_id_valid			= default_apic_id_valid, -	.apic_id_registered		= bigsmp_apic_id_registered, - -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED, -	.dest_mode_logical		= false, - -	.disable_esr			= 1, - -	.check_apicid_used		= bigsmp_check_apicid_used, -	.init_apic_ldr			= bigsmp_init_apic_ldr, -	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map, -	.setup_apic_routing		= bigsmp_setup_apic_routing, -	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid, -	.apicid_to_cpu_present		= physid_set_mask_of_physid, -	.check_phys_apicid_present	= bigsmp_check_phys_apicid_present, -	.phys_pkg_id			= bigsmp_phys_pkg_id, - -	.get_apic_id			= bigsmp_get_apic_id, -	.set_apic_id			= NULL, - -	.calc_dest_apicid		= apic_default_calc_apicid, - -	.send_IPI			= default_send_IPI_single_phys, -	.send_IPI_mask			= default_send_IPI_mask_sequence_phys, -	.send_IPI_mask_allbutself	= NULL, -	.send_IPI_allbutself		= bigsmp_send_IPI_allbutself, -	.send_IPI_all			= bigsmp_send_IPI_all, -	.send_IPI_self			= default_send_IPI_self, - -	.inquire_remote_apic		= default_inquire_remote_apic, - -	.read				= native_apic_mem_read, -	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, -	.icr_read			= native_apic_icr_read, -	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= native_apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle, - -	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid, -}; - -void __init generic_bigsmp_probe(void) -{ -	unsigned int cpu; - -	if (!probe_bigsmp()) -		return; - -	apic = &apic_bigsmp; - -	for_each_possible_cpu(cpu) { -		if (early_per_cpu(x86_cpu_to_logical_apicid, -				  cpu) == BAD_APICID) -			continue; -		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = -			bigsmp_early_logical_apicid(cpu); -	} - -	pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); -} - -apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 34a992e275ef..45af535c44a0 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -21,6 +21,8 @@  #include <linux/init.h>  #include <linux/delay.h> +#include "local.h" +  #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF  u64 hw_nmi_get_sample_period(int watchdog_thresh)  { @@ -31,12 +33,12 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)  #ifdef arch_trigger_cpumask_backtrace  static void nmi_raise_cpu_backtrace(cpumask_t *mask)  { -	apic->send_IPI_mask(mask, NMI_VECTOR); +	__apic_send_IPI_mask(mask, NMI_VECTOR);  } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)  { -	nmi_trigger_cpumask_backtrace(mask, exclude_self, +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu,  				      nmi_raise_cpu_backtrace);  } diff --git a/arch/x86/kernel/apic/init.c b/arch/x86/kernel/apic/init.c new file mode 100644 index 000000000000..821e2e536f19 --- /dev/null +++ b/arch/x86/kernel/apic/init.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) "APIC: " fmt + +#include <asm/apic.h> + +#include "local.h" + +/* + * Use DEFINE_STATIC_CALL_NULL() to avoid having to provide stub functions + * for each callback. The callbacks are setup during boot and all except + * wait_icr_idle() must be initialized before usage. The IPI wrappers + * use static_call() and not static_call_cond() to catch any fails. + */ +#define DEFINE_APIC_CALL(__cb)						\ +	DEFINE_STATIC_CALL_NULL(apic_call_##__cb, *apic->__cb) + +DEFINE_APIC_CALL(eoi); +DEFINE_APIC_CALL(native_eoi); +DEFINE_APIC_CALL(icr_read); +DEFINE_APIC_CALL(icr_write); +DEFINE_APIC_CALL(read); +DEFINE_APIC_CALL(send_IPI); +DEFINE_APIC_CALL(send_IPI_mask); +DEFINE_APIC_CALL(send_IPI_mask_allbutself); +DEFINE_APIC_CALL(send_IPI_allbutself); +DEFINE_APIC_CALL(send_IPI_all); +DEFINE_APIC_CALL(send_IPI_self); +DEFINE_APIC_CALL(wait_icr_idle); +DEFINE_APIC_CALL(wakeup_secondary_cpu); +DEFINE_APIC_CALL(wakeup_secondary_cpu_64); +DEFINE_APIC_CALL(write); + +EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_mask); +EXPORT_STATIC_CALL_TRAMP_GPL(apic_call_send_IPI_self); + +/* The container for function call overrides */ +struct apic_override __x86_apic_override __initdata; + +#define apply_override(__cb)					\ +	if (__x86_apic_override.__cb)				\ +		apic->__cb = __x86_apic_override.__cb + +static __init void restore_override_callbacks(void) +{ +	apply_override(eoi); +	apply_override(native_eoi); +	apply_override(write); +	apply_override(read); +	apply_override(send_IPI); +	apply_override(send_IPI_mask); +	apply_override(send_IPI_mask_allbutself); +	apply_override(send_IPI_allbutself); +	apply_override(send_IPI_all); +	apply_override(send_IPI_self); +	apply_override(icr_read); +	apply_override(icr_write); +	apply_override(wakeup_secondary_cpu); +	apply_override(wakeup_secondary_cpu_64); +} + +#define update_call(__cb)					\ +	static_call_update(apic_call_##__cb, *apic->__cb) + +static __init void update_static_calls(void) +{ +	update_call(eoi); +	update_call(native_eoi); +	update_call(write); +	update_call(read); +	update_call(send_IPI); +	update_call(send_IPI_mask); +	update_call(send_IPI_mask_allbutself); +	update_call(send_IPI_allbutself); +	update_call(send_IPI_all); +	update_call(send_IPI_self); +	update_call(icr_read); +	update_call(icr_write); +	update_call(wait_icr_idle); +	update_call(wakeup_secondary_cpu); +	update_call(wakeup_secondary_cpu_64); +} + +void __init apic_setup_apic_calls(void) +{ +	/* Ensure that the default APIC has native_eoi populated */ +	apic->native_eoi = apic->eoi; +	update_static_calls(); +	pr_info("Static calls initialized\n"); +} + +void __init apic_install_driver(struct apic *driver) +{ +	if (apic == driver) +		return; + +	apic = driver; + +	if (IS_ENABLED(CONFIG_X86_X2APIC) && apic->x2apic_set_max_apicid) +		apic->max_apic_id = x2apic_max_apicid; + +	/* Copy the original eoi() callback as KVM/HyperV might overwrite it */ +	if (!apic->native_eoi) +		apic->native_eoi = apic->eoi; + +	/* Apply any already installed callback overrides */ +	restore_override_callbacks(); +	update_static_calls(); + +	pr_info("Switched APIC routing to: %s\n", driver->name); +} diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a868b76cd3d4..5ba2feb2c04c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@  #include <asm/hw_irq.h>  #include <asm/apic.h>  #include <asm/pgtable.h> +#include <asm/x86_init.h>  #define	for_each_ioapic(idx)		\  	for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -85,8 +86,8 @@ static unsigned int ioapic_dynirq_base;  static int ioapic_initialized;  struct irq_pin_list { -	struct list_head list; -	int apic, pin; +	struct list_head	list; +	int			apic, pin;  };  struct mp_chip_data { @@ -95,7 +96,7 @@ struct mp_chip_data {  	bool				is_level;  	bool				active_low;  	bool				isa_irq; -	u32 count; +	u32				count;  };  struct mp_ioapic_gsi { @@ -104,21 +105,17 @@ struct mp_ioapic_gsi {  };  static struct ioapic { -	/* -	 * # of IRQ routing registers -	 */ -	int nr_registers; -	/* -	 * Saved state during suspend/resume, or while enabling intr-remap. -	 */ -	struct IO_APIC_route_entry *saved_registers; +	/* # of IRQ routing registers */ +	int				nr_registers; +	/* Saved state during suspend/resume, or while enabling intr-remap. */ +	struct IO_APIC_route_entry	*saved_registers;  	/* I/O APIC config */ -	struct mpc_ioapic mp_config; +	struct mpc_ioapic		mp_config;  	/* IO APIC gsi routing info */ -	struct mp_ioapic_gsi  gsi_config; -	struct ioapic_domain_cfg irqdomain_cfg; -	struct irq_domain *irqdomain; -	struct resource *iomem_res; +	struct mp_ioapic_gsi		gsi_config; +	struct ioapic_domain_cfg	irqdomain_cfg; +	struct irq_domain		*irqdomain; +	struct resource			*iomem_res;  } ioapics[MAX_IO_APICS];  #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver @@ -177,7 +174,7 @@ int mp_bus_id_to_type[MAX_MP_BUSSES];  DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int skip_ioapic_setup; +bool ioapic_is_disabled __ro_after_init;  /**   * disable_ioapic_support() - disables ioapic support at runtime @@ -188,7 +185,7 @@ void disable_ioapic_support(void)  	noioapicquirk = 1;  	noioapicreroute = -1;  #endif -	skip_ioapic_setup = 1; +	ioapic_is_disabled = true;  }  static int __init parse_noapic(char *str) @@ -204,10 +201,9 @@ void mp_save_irq(struct mpc_intsrc *m)  {  	int i; -	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," -		" IRQ %02x, APIC ID %x, APIC INT %02x\n", -		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, -		m->srcbusirq, m->dstapic, m->dstirq); +	apic_pr_verbose("Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n", +			m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, +			m->srcbusirq, m->dstapic, m->dstirq);  	for (i = 0; i < mp_irq_entries; i++) {  		if (!memcmp(&mp_irqs[i], m, sizeof(*m))) @@ -268,12 +264,14 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)  static inline void io_apic_eoi(unsigned int apic, unsigned int vector)  {  	struct io_apic __iomem *io_apic = io_apic_base(apic); +  	writel(vector, &io_apic->eoi);  }  unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)  {  	struct io_apic __iomem *io_apic = io_apic_base(apic); +  	writel(reg, &io_apic->index);  	return readl(&io_apic->data);  } @@ -299,14 +297,8 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)  static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)  { -	struct IO_APIC_route_entry entry; -	unsigned long flags; - -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	entry = __ioapic_read_entry(apic, pin); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); - -	return entry; +	guard(raw_spinlock_irqsave)(&ioapic_lock); +	return __ioapic_read_entry(apic, pin);  }  /* @@ -323,11 +315,8 @@ static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e  static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)  { -	unsigned long flags; - -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	__ioapic_write_entry(apic, pin, e); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  /* @@ -338,12 +327,10 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)  static void ioapic_mask_entry(int apic, int pin)  {  	struct IO_APIC_route_entry e = { .masked = true }; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	io_apic_write(apic, 0x10 + 2*pin, e.w1);  	io_apic_write(apic, 0x11 + 2*pin, e.w2); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  /* @@ -351,68 +338,39 @@ static void ioapic_mask_entry(int apic, int pin)   * shared ISA-space IRQs, so we have to support them. We are super   * fast in the common case, and fast for shared ISA-space IRQs.   */ -static int __add_pin_to_irq_node(struct mp_chip_data *data, -				 int node, int apic, int pin) +static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)  {  	struct irq_pin_list *entry; -	/* don't allow duplicates */ -	for_each_irq_pin(entry, data->irq_2_pin) +	/* Don't allow duplicates */ +	for_each_irq_pin(entry, data->irq_2_pin) {  		if (entry->apic == apic && entry->pin == pin) -			return 0; +			return true; +	}  	entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);  	if (!entry) { -		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", -		       node, apic, pin); -		return -ENOMEM; +		pr_err("Cannot allocate irq_pin_list (%d,%d,%d)\n", node, apic, pin); +		return false;  	} +  	entry->apic = apic;  	entry->pin = pin;  	list_add_tail(&entry->list, &data->irq_2_pin); - -	return 0; +	return true;  }  static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)  {  	struct irq_pin_list *tmp, *entry; -	list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) +	list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) {  		if (entry->apic == apic && entry->pin == pin) {  			list_del(&entry->list);  			kfree(entry);  			return;  		} -} - -static void add_pin_to_irq_node(struct mp_chip_data *data, -				int node, int apic, int pin) -{ -	if (__add_pin_to_irq_node(data, node, apic, pin)) -		panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, -					   int oldapic, int oldpin, -					   int newapic, int newpin) -{ -	struct irq_pin_list *entry; - -	for_each_irq_pin(entry, data->irq_2_pin) { -		if (entry->apic == oldapic && entry->pin == oldpin) { -			entry->apic = newapic; -			entry->pin = newpin; -			/* every one is different, right? */ -			return; -		}  	} - -	/* old apic/pin didn't exist, so just add new ones */ -	add_pin_to_irq_node(data, node, newapic, newpin);  }  static void io_apic_modify_irq(struct mp_chip_data *data, bool masked, @@ -429,12 +387,12 @@ static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,  	}  } +/* + * Synchronize the IO-APIC and the CPU by doing a dummy read from the + * IO-APIC + */  static void io_apic_sync(struct irq_pin_list *entry)  { -	/* -	 * Synchronize the IO-APIC and the CPU by doing -	 * a dummy read from the IO-APIC -	 */  	struct io_apic __iomem *io_apic;  	io_apic = io_apic_base(entry->apic); @@ -444,11 +402,9 @@ static void io_apic_sync(struct irq_pin_list *entry)  static void mask_ioapic_irq(struct irq_data *irq_data)  {  	struct mp_chip_data *data = irq_data->chip_data; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	io_apic_modify_irq(data, true, &io_apic_sync); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  static void __unmask_ioapic(struct mp_chip_data *data) @@ -459,11 +415,9 @@ static void __unmask_ioapic(struct mp_chip_data *data)  static void unmask_ioapic_irq(struct irq_data *irq_data)  {  	struct mp_chip_data *data = irq_data->chip_data; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	__unmask_ioapic(data); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  /* @@ -491,30 +445,24 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector)  		entry = entry1 = __ioapic_read_entry(apic, pin); -		/* -		 * Mask the entry and change the trigger mode to edge. -		 */ +		/* Mask the entry and change the trigger mode to edge. */  		entry1.masked = true;  		entry1.is_level = false;  		__ioapic_write_entry(apic, pin, entry1); -		/* -		 * Restore the previous level triggered entry. -		 */ +		/* Restore the previous level triggered entry. */  		__ioapic_write_entry(apic, pin, entry);  	}  }  static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)  { -	unsigned long flags;  	struct irq_pin_list *entry; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	for_each_irq_pin(entry, data->irq_2_pin)  		__eoi_ioapic_pin(entry->apic, entry->pin, vector); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) @@ -537,8 +485,6 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)  	}  	if (entry.irr) { -		unsigned long flags; -  		/*  		 * Make sure the trigger mode is set to level. Explicit EOI  		 * doesn't clear the remote-IRR if the trigger mode is not @@ -548,9 +494,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)  			entry.is_level = true;  			ioapic_write_entry(apic, pin, entry);  		} -		raw_spin_lock_irqsave(&ioapic_lock, flags); +		guard(raw_spinlock_irqsave)(&ioapic_lock);  		__eoi_ioapic_pin(apic, pin, entry.vector); -		raw_spin_unlock_irqrestore(&ioapic_lock, flags);  	}  	/* @@ -585,28 +530,23 @@ static int pirq_entries[MAX_PIRQS] = {  static int __init ioapic_pirq_setup(char *str)  { -	int i, max; -	int ints[MAX_PIRQS+1]; +	int i, max, ints[MAX_PIRQS+1];  	get_options(str, ARRAY_SIZE(ints), ints); -	apic_printk(APIC_VERBOSE, KERN_INFO -			"PIRQ redirection, working around broken MP-BIOS.\n"); +	apic_pr_verbose("PIRQ redirection, working around broken MP-BIOS.\n"); +  	max = MAX_PIRQS;  	if (ints[0] < MAX_PIRQS)  		max = ints[0];  	for (i = 0; i < max; i++) { -		apic_printk(APIC_VERBOSE, KERN_DEBUG -				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]); -		/* -		 * PIRQs are mapped upside down, usually. -		 */ +		apic_pr_verbose("... PIRQ%d -> IRQ %d\n", i, ints[i + 1]); +		/* PIRQs are mapped upside down, usually */  		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];  	}  	return 1;  } -  __setup("pirq=", ioapic_pirq_setup);  #endif /* CONFIG_X86_32 */ @@ -625,8 +565,7 @@ int save_ioapic_entries(void)  		}  		for_each_pin(apic, pin) -			ioapics[apic].saved_registers[pin] = -				ioapic_read_entry(apic, pin); +			ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin);  	}  	return err; @@ -667,8 +606,7 @@ int restore_ioapic_entries(void)  			continue;  		for_each_pin(apic, pin) -			ioapic_write_entry(apic, pin, -					   ioapics[apic].saved_registers[pin]); +			ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]);  	}  	return 0;  } @@ -680,12 +618,13 @@ static int find_irq_entry(int ioapic_idx, int pin, int type)  {  	int i; -	for (i = 0; i < mp_irq_entries; i++) +	for (i = 0; i < mp_irq_entries; i++) {  		if (mp_irqs[i].irqtype == type &&  		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||  		     mp_irqs[i].dstapic == MP_APIC_ALL) &&  		    mp_irqs[i].dstirq == pin)  			return i; +	}  	return -1;  } @@ -700,10 +639,8 @@ static int __init find_isa_irq_pin(int irq, int type)  	for (i = 0; i < mp_irq_entries; i++) {  		int lbus = mp_irqs[i].srcbus; -		if (test_bit(lbus, mp_bus_not_pci) && -		    (mp_irqs[i].irqtype == type) && +		if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&  		    (mp_irqs[i].srcbusirq == irq)) -  			return mp_irqs[i].dstirq;  	}  	return -1; @@ -716,8 +653,7 @@ static int __init find_isa_irq_apic(int irq, int type)  	for (i = 0; i < mp_irq_entries; i++) {  		int lbus = mp_irqs[i].srcbus; -		if (test_bit(lbus, mp_bus_not_pci) && -		    (mp_irqs[i].irqtype == type) && +		if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&  		    (mp_irqs[i].srcbusirq == irq))  			break;  	} @@ -725,9 +661,10 @@ static int __init find_isa_irq_apic(int irq, int type)  	if (i < mp_irq_entries) {  		int ioapic_idx; -		for_each_ioapic(ioapic_idx) +		for_each_ioapic(ioapic_idx) {  			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)  				return ioapic_idx; +		}  	}  	return -1; @@ -768,8 +705,7 @@ static bool EISA_ELCR(unsigned int irq)  		unsigned int port = PIC_ELCR1 + (irq >> 3);  		return (inb(port) >> (irq & 7)) & 1;  	} -	apic_printk(APIC_VERBOSE, KERN_INFO -			"Broken MPtable reports ISA irq %d\n", irq); +	apic_pr_verbose("Broken MPtable reports ISA irq %d\n", irq);  	return false;  } @@ -830,7 +766,7 @@ static int __acpi_get_override_irq(u32 gsi, bool *trigger, bool *polarity)  {  	int ioapic, pin, idx; -	if (skip_ioapic_setup) +	if (ioapic_is_disabled)  		return -1;  	ioapic = mp_find_ioapic(gsi); @@ -946,9 +882,9 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)  static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,  				 struct irq_alloc_info *info)  { +	int type = ioapics[ioapic].irqdomain_cfg.type;  	bool legacy = false;  	int irq = -1; -	int type = ioapics[ioapic].irqdomain_cfg.type;  	switch (type) {  	case IOAPIC_DOMAIN_LEGACY: @@ -970,8 +906,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,  		return -1;  	} -	return __irq_domain_alloc_irqs(domain, irq, 1, -				       ioapic_alloc_attr_node(info), +	return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info),  				       info, legacy, NULL);  } @@ -985,29 +920,26 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,   * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be   * multiple pins sharing the same legacy IRQ number when ACPI is disabled.   */ -static int alloc_isa_irq_from_domain(struct irq_domain *domain, -				     int irq, int ioapic, int pin, +static int alloc_isa_irq_from_domain(struct irq_domain *domain, int irq, int ioapic, int pin,  				     struct irq_alloc_info *info)  { -	struct mp_chip_data *data;  	struct irq_data *irq_data = irq_get_irq_data(irq);  	int node = ioapic_alloc_attr_node(info); +	struct mp_chip_data *data;  	/*  	 * Legacy ISA IRQ has already been allocated, just add pin to  	 * the pin list associated with this IRQ and program the IOAPIC -	 * entry. The IOAPIC entry +	 * entry.  	 */  	if (irq_data && irq_data->parent_data) {  		if (!mp_check_pin_attr(irq, info))  			return -EBUSY; -		if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, -					  info->ioapic.pin)) +		if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))  			return -ENOMEM;  	} else {  		info->flags |= X86_IRQ_ALLOC_LEGACY; -		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, -					      NULL); +		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL);  		if (irq >= 0) {  			irq_data = irq_domain_get_irq_data(domain, irq);  			data = irq_data->chip_data; @@ -1021,11 +953,11 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,  static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,  			     unsigned int flags, struct irq_alloc_info *info)  { -	int irq; -	bool legacy = false; +	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);  	struct irq_alloc_info tmp;  	struct mp_chip_data *data; -	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); +	bool legacy = false; +	int irq;  	if (!domain)  		return -ENOSYS; @@ -1045,7 +977,7 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,  			return -EINVAL;  	} -	mutex_lock(&ioapic_mutex); +	guard(mutex)(&ioapic_mutex);  	if (!(flags & IOAPIC_MAP_ALLOC)) {  		if (!legacy) {  			irq = irq_find_mapping(domain, pin); @@ -1066,8 +998,6 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,  			data->count++;  		}  	} -	mutex_unlock(&ioapic_mutex); -  	return irq;  } @@ -1075,26 +1005,20 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)  {  	u32 gsi = mp_pin_to_gsi(ioapic, pin); -	/* -	 * Debugging check, we are in big trouble if this message pops up! -	 */ +	/* Debugging check, we are in big trouble if this message pops up! */  	if (mp_irqs[idx].dstirq != pin)  		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");  #ifdef CONFIG_X86_32 -	/* -	 * PCI IRQ command line redirection. Yes, limits are hardcoded. -	 */ +	/* PCI IRQ command line redirection. Yes, limits are hardcoded. */  	if ((pin >= 16) && (pin <= 23)) { -		if (pirq_entries[pin-16] != -1) { -			if (!pirq_entries[pin-16]) { -				apic_printk(APIC_VERBOSE, KERN_DEBUG -						"disabling PIRQ%d\n", pin-16); +		if (pirq_entries[pin - 16] != -1) { +			if (!pirq_entries[pin - 16]) { +				apic_pr_verbose("Disabling PIRQ%d\n", pin - 16);  			} else {  				int irq = pirq_entries[pin-16]; -				apic_printk(APIC_VERBOSE, KERN_DEBUG -						"using PIRQ%d -> IRQ %d\n", -						pin-16, irq); + +				apic_pr_verbose("Using PIRQ%d -> IRQ %d\n", pin - 16, irq);  				return irq;  			}  		} @@ -1132,10 +1056,9 @@ void mp_unmap_irq(int irq)  	if (!data || data->isa_irq)  		return; -	mutex_lock(&ioapic_mutex); +	guard(mutex)(&ioapic_mutex);  	if (--data->count == 0)  		irq_domain_free_irqs(irq, 1); -	mutex_unlock(&ioapic_mutex);  }  /* @@ -1146,12 +1069,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)  {  	int irq, i, best_ioapic = -1, best_idx = -1; -	apic_printk(APIC_DEBUG, -		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", -		    bus, slot, pin); +	apic_pr_debug("Querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", +		      bus, slot, pin);  	if (test_bit(bus, mp_bus_not_pci)) { -		apic_printk(APIC_VERBOSE, -			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus); +		apic_pr_verbose("PCI BIOS passed nonexistent PCI bus %d!\n", bus);  		return -1;  	} @@ -1196,8 +1117,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)  		return -1;  out: -	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, -			 IOAPIC_MAP_ALLOC); +	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, IOAPIC_MAP_ALLOC);  }  EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); @@ -1208,17 +1128,16 @@ static void __init setup_IO_APIC_irqs(void)  	unsigned int ioapic, pin;  	int idx; -	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); +	apic_pr_verbose("Init IO_APIC IRQs\n");  	for_each_ioapic_pin(ioapic, pin) {  		idx = find_irq_entry(ioapic, pin, mp_INT); -		if (idx < 0) -			apic_printk(APIC_VERBOSE, -				    KERN_DEBUG " apic %d pin %d not connected\n", -				    mpc_ioapic_id(ioapic), pin); -		else -			pin_2_irq(idx, ioapic, pin, -				  ioapic ? 0 : IOAPIC_MAP_ALLOC); +		if (idx < 0) { +			apic_pr_verbose("apic %d pin %d not connected\n", +					mpc_ioapic_id(ioapic), pin); +		} else { +			pin_2_irq(idx, ioapic, pin, ioapic ? 0 : IOAPIC_MAP_ALLOC); +		}  	}  } @@ -1233,26 +1152,21 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)  	char buf[256];  	int i; -	printk(KERN_DEBUG "IOAPIC %d:\n", apic); +	apic_dbg("IOAPIC %d:\n", apic);  	for (i = 0; i <= nr_entries; i++) {  		entry = ioapic_read_entry(apic, i); -		snprintf(buf, sizeof(buf), -			 " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", -			 i, -			 entry.masked ? "disabled" : "enabled ", +		snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", +			 i, entry.masked ? "disabled" : "enabled ",  			 entry.is_level ? "level" : "edge ",  			 entry.active_low ? "low " : "high",  			 entry.vector, entry.irr, entry.delivery_status);  		if (entry.ir_format) { -			printk(KERN_DEBUG "%s, remapped, I(%04X),  Z(%X)\n", -			       buf, -			       (entry.ir_index_15 << 15) | entry.ir_index_0_14, -				entry.ir_zero); +			apic_dbg("%s, remapped, I(%04X),  Z(%X)\n", buf, +				 (entry.ir_index_15 << 15) | entry.ir_index_0_14, entry.ir_zero);  		} else { -			printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf, -			       entry.dest_mode_logical ? "logical " : "physical", -			       entry.virt_destid_8_14, entry.destid_0_7, -			       entry.delivery_mode); +			apic_dbg("%s, %s, D(%02X%02X), M(%1d)\n", buf, +				 entry.dest_mode_logical ? "logical " : "physical", +				 entry.virt_destid_8_14, entry.destid_0_7, entry.delivery_mode);  		}  	}  } @@ -1263,30 +1177,25 @@ static void __init print_IO_APIC(int ioapic_idx)  	union IO_APIC_reg_01 reg_01;  	union IO_APIC_reg_02 reg_02;  	union IO_APIC_reg_03 reg_03; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	reg_00.raw = io_apic_read(ioapic_idx, 0); -	reg_01.raw = io_apic_read(ioapic_idx, 1); -	if (reg_01.bits.version >= 0x10) -		reg_02.raw = io_apic_read(ioapic_idx, 2); -	if (reg_01.bits.version >= 0x20) -		reg_03.raw = io_apic_read(ioapic_idx, 3); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); - -	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); -	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); -	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID); -	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type); -	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS); - -	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); -	printk(KERN_DEBUG ".......     : max redirection entries: %02X\n", -		reg_01.bits.entries); - -	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ); -	printk(KERN_DEBUG ".......     : IO APIC version: %02X\n", -		reg_01.bits.version); +	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { +		reg_00.raw = io_apic_read(ioapic_idx, 0); +		reg_01.raw = io_apic_read(ioapic_idx, 1); +		if (reg_01.bits.version >= 0x10) +			reg_02.raw = io_apic_read(ioapic_idx, 2); +		if (reg_01.bits.version >= 0x20) +			reg_03.raw = io_apic_read(ioapic_idx, 3); +	} + +	apic_dbg("IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); +	apic_dbg(".... register #00: %08X\n", reg_00.raw); +	apic_dbg(".......    : physical APIC id: %02X\n", reg_00.bits.ID); +	apic_dbg(".......    : Delivery Type: %X\n", reg_00.bits.delivery_type); +	apic_dbg(".......    : LTS          : %X\n", reg_00.bits.LTS); +	apic_dbg(".... register #01: %08X\n", *(int *)®_01); +	apic_dbg(".......     : max redirection entries: %02X\n", reg_01.bits.entries); +	apic_dbg(".......     : PRQ implemented: %X\n", reg_01.bits.PRQ); +	apic_dbg(".......     : IO APIC version: %02X\n", reg_01.bits.version);  	/*  	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, @@ -1294,8 +1203,8 @@ static void __init print_IO_APIC(int ioapic_idx)  	 * value, so ignore it if reg_02 == reg_01.  	 */  	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { -		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); -		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration); +		apic_dbg(".... register #02: %08X\n", reg_02.raw); +		apic_dbg(".......     : arbitration: %02X\n", reg_02.bits.arbitration);  	}  	/* @@ -1305,11 +1214,11 @@ static void __init print_IO_APIC(int ioapic_idx)  	 */  	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&  	    reg_03.raw != reg_01.raw) { -		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); -		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT); +		apic_dbg(".... register #03: %08X\n", reg_03.raw); +		apic_dbg(".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);  	} -	printk(KERN_DEBUG ".... IRQ redirection table:\n"); +	apic_dbg(".... IRQ redirection table:\n");  	io_apic_print_entries(ioapic_idx, reg_01.bits.entries);  } @@ -1318,11 +1227,11 @@ void __init print_IO_APICs(void)  	int ioapic_idx;  	unsigned int irq; -	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); -	for_each_ioapic(ioapic_idx) -		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", -		       mpc_ioapic_id(ioapic_idx), -		       ioapics[ioapic_idx].nr_registers); +	apic_dbg("number of MP IRQ sources: %d.\n", mp_irq_entries); +	for_each_ioapic(ioapic_idx) { +		apic_dbg("number of IO-APIC #%d registers: %d.\n", +			 mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers); +	}  	/*  	 * We are a bit conservative about what we expect.  We have to @@ -1333,7 +1242,7 @@ void __init print_IO_APICs(void)  	for_each_ioapic(ioapic_idx)  		print_IO_APIC(ioapic_idx); -	printk(KERN_DEBUG "IRQ to pin mappings:\n"); +	apic_dbg("IRQ to pin mappings:\n");  	for_each_active_irq(irq) {  		struct irq_pin_list *entry;  		struct irq_chip *chip; @@ -1348,7 +1257,7 @@ void __init print_IO_APICs(void)  		if (list_empty(&data->irq_2_pin))  			continue; -		printk(KERN_DEBUG "IRQ%d ", irq); +		apic_dbg("IRQ%d ", irq);  		for_each_irq_pin(entry, data->irq_2_pin)  			pr_cont("-> %d:%d", entry->apic, entry->pin);  		pr_cont("\n"); @@ -1362,10 +1271,9 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };  void __init enable_IO_APIC(void)  { -	int i8259_apic, i8259_pin; -	int apic, pin; +	int i8259_apic, i8259_pin, apic, pin; -	if (skip_ioapic_setup) +	if (ioapic_is_disabled)  		nr_ioapics = 0;  	if (!nr_legacy_irqs() || !nr_ioapics) @@ -1375,19 +1283,21 @@ void __init enable_IO_APIC(void)  		/* See if any of the pins is in ExtINT mode */  		struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin); -		/* If the interrupt line is enabled and in ExtInt mode -		 * I have found the pin where the i8259 is connected. +		/* +		 * If the interrupt line is enabled and in ExtInt mode I +		 * have found the pin where the i8259 is connected.  		 */ -		if (!entry.masked && -		    entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) { +		if (!entry.masked && entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {  			ioapic_i8259.apic = apic;  			ioapic_i8259.pin  = pin; -			goto found_i8259; +			break;  		}  	} - found_i8259: -	/* Look to see what if the MP table has reported the ExtINT */ -	/* If we could not find the appropriate pin by looking at the ioapic + +	/* +	 * Look to see what if the MP table has reported the ExtINT +	 * +	 * If we could not find the appropriate pin by looking at the ioapic  	 * the i8259 probably is not connected the ioapic but give the  	 * mptable a chance anyway.  	 */ @@ -1395,29 +1305,24 @@ void __init enable_IO_APIC(void)  	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);  	/* Trust the MP table if nothing is setup in the hardware */  	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { -		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); +		pr_warn("ExtINT not setup in hardware but reported by MP table\n");  		ioapic_i8259.pin  = i8259_pin;  		ioapic_i8259.apic = i8259_apic;  	}  	/* Complain if the MP table and the hardware disagree */  	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && -		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) -	{ -		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); -	} +	    (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) +		pr_warn("ExtINT in hardware and MP table differ\n"); -	/* -	 * Do not trust the IO-APIC being empty at bootup -	 */ +	/* Do not trust the IO-APIC being empty at bootup */  	clear_IO_APIC();  }  void native_restore_boot_irq_mode(void)  {  	/* -	 * If the i8259 is routed through an IOAPIC -	 * Put that IOAPIC in virtual wire mode -	 * so legacy interrupts can be delivered. +	 * If the i8259 is routed through an IOAPIC Put that IOAPIC in +	 * virtual wire mode so legacy interrupts can be delivered.  	 */  	if (ioapic_i8259.pin != -1) {  		struct IO_APIC_route_entry entry; @@ -1432,9 +1337,7 @@ void native_restore_boot_irq_mode(void)  		entry.destid_0_7	= apic_id & 0xFF;  		entry.virt_destid_8_14	= apic_id >> 8; -		/* -		 * Add it to the IO-APIC irq-routing table: -		 */ +		/* Add it to the IO-APIC irq-routing table */  		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);  	} @@ -1457,37 +1360,34 @@ void restore_boot_irq_mode(void)   *   * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999   */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) +static void __init setup_ioapic_ids_from_mpc_nocheck(void)  { +	DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC); +	const u32 broadcast_id = 0xF;  	union IO_APIC_reg_00 reg_00; -	physid_mask_t phys_id_present_map; -	int ioapic_idx; -	int i;  	unsigned char old_id; -	unsigned long flags; +	int ioapic_idx, i;  	/*  	 * This is broken; anything with a real cpu count has to  	 * circumvent this idiocy regardless.  	 */ -	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); +	copy_phys_cpu_present_map(phys_id_present_map);  	/*  	 * Set the IOAPIC ID to the value stored in the MPC table.  	 */  	for_each_ioapic(ioapic_idx) {  		/* Read the register 0 value */ -		raw_spin_lock_irqsave(&ioapic_lock, flags); -		reg_00.raw = io_apic_read(ioapic_idx, 0); -		raw_spin_unlock_irqrestore(&ioapic_lock, flags); +		scoped_guard (raw_spinlock_irqsave, &ioapic_lock) +			reg_00.raw = io_apic_read(ioapic_idx, 0);  		old_id = mpc_ioapic_id(ioapic_idx); -		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { -			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", -				ioapic_idx, mpc_ioapic_id(ioapic_idx)); -			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", -				reg_00.bits.ID); +		if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) { +			pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n", +			       ioapic_idx, mpc_ioapic_id(ioapic_idx)); +			pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID);  			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;  		} @@ -1496,65 +1396,54 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)  		 * system must have a unique ID or we get lots of nice  		 * 'stuck on smp_invalidate_needed IPI wait' messages.  		 */ -		if (apic->check_apicid_used(&phys_id_present_map, -					    mpc_ioapic_id(ioapic_idx))) { -			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", -				ioapic_idx, mpc_ioapic_id(ioapic_idx)); -			for (i = 0; i < get_physical_broadcast(); i++) -				if (!physid_isset(i, phys_id_present_map)) +		if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { +			pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", +			       ioapic_idx, mpc_ioapic_id(ioapic_idx)); +			for (i = 0; i < broadcast_id; i++) +				if (!test_bit(i, phys_id_present_map))  					break; -			if (i >= get_physical_broadcast()) +			if (i >= broadcast_id)  				panic("Max APIC ID exceeded!\n"); -			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", -				i); -			physid_set(i, phys_id_present_map); +			pr_err("... fixing up to %d. (tell your hw vendor)\n", i); +			set_bit(i, phys_id_present_map);  			ioapics[ioapic_idx].mp_config.apicid = i;  		} else { -			physid_mask_t tmp; -			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), -						    &tmp); -			apic_printk(APIC_VERBOSE, "Setting %d in the " -					"phys_id_present_map\n", +			apic_pr_verbose("Setting %d in the phys_id_present_map\n",  					mpc_ioapic_id(ioapic_idx)); -			physids_or(phys_id_present_map, phys_id_present_map, tmp); +			set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map);  		}  		/* -		 * We need to adjust the IRQ routing table -		 * if the ID changed. +		 * We need to adjust the IRQ routing table if the ID +		 * changed.  		 */ -		if (old_id != mpc_ioapic_id(ioapic_idx)) -			for (i = 0; i < mp_irq_entries; i++) +		if (old_id != mpc_ioapic_id(ioapic_idx)) { +			for (i = 0; i < mp_irq_entries; i++) {  				if (mp_irqs[i].dstapic == old_id) -					mp_irqs[i].dstapic -						= mpc_ioapic_id(ioapic_idx); +					mp_irqs[i].dstapic = mpc_ioapic_id(ioapic_idx); +			} +		}  		/* -		 * Update the ID register according to the right value -		 * from the MPC table if they are different. +		 * Update the ID register according to the right value from +		 * the MPC table if they are different.  		 */  		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)  			continue; -		apic_printk(APIC_VERBOSE, KERN_INFO -			"...changing IO-APIC physical APIC ID to %d ...", -			mpc_ioapic_id(ioapic_idx)); +		apic_pr_verbose("...changing IO-APIC physical APIC ID to %d ...", +				mpc_ioapic_id(ioapic_idx));  		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); -		raw_spin_lock_irqsave(&ioapic_lock, flags); -		io_apic_write(ioapic_idx, 0, reg_00.raw); -		raw_spin_unlock_irqrestore(&ioapic_lock, flags); - -		/* -		 * Sanity check -		 */ -		raw_spin_lock_irqsave(&ioapic_lock, flags); -		reg_00.raw = io_apic_read(ioapic_idx, 0); -		raw_spin_unlock_irqrestore(&ioapic_lock, flags); +		scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { +			io_apic_write(ioapic_idx, 0, reg_00.raw); +			reg_00.raw = io_apic_read(ioapic_idx, 0); +		} +		/* Sanity check */  		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))  			pr_cont("could not set ID!\n");  		else -			apic_printk(APIC_VERBOSE, " ok.\n"); +			apic_pr_verbose(" ok.\n");  	}  } @@ -1597,10 +1486,9 @@ static void __init delay_with_tsc(void)  	 * 1 GHz == 40 jiffies  	 */  	do { -		rep_nop(); +		native_pause();  		now = rdtsc(); -	} while ((now - start) < 40000000000ULL / HZ && -		time_before_eq(jiffies, end)); +	} while ((now - start) < 40000000000ULL / HZ &&	time_before_eq(jiffies, end));  }  static void __init delay_without_tsc(void) @@ -1661,36 +1549,29 @@ static int __init timer_irq_works(void)   * so we 'resend' these IRQs via IPIs, to the same CPU. It's much   * better to do it this way as thus we do not have to be aware of   * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending.   * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... + * + * Edge triggered needs to resend any interrupt that was delayed but this + * is now handled in the device independent code. + * + * Starting up a edge-triggered IO-APIC interrupt is nasty - we need to + * make sure that we get the edge.  If it is already asserted for some + * reason, we need return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake an edge even if it + * isn't on the 8259A...   */  static unsigned int startup_ioapic_irq(struct irq_data *data)  {  	int was_pending = 0, irq = data->irq; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	if (irq < nr_legacy_irqs()) {  		legacy_pic->mask(irq);  		if (legacy_pic->irq_pending(irq))  			was_pending = 1;  	}  	__unmask_ioapic(data->chip_data); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -  	return was_pending;  } @@ -1700,9 +1581,8 @@ atomic_t irq_mis_count;  static bool io_apic_level_ack_pending(struct mp_chip_data *data)  {  	struct irq_pin_list *entry; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	for_each_irq_pin(entry, data->irq_2_pin) {  		struct IO_APIC_route_entry e;  		int pin; @@ -1710,13 +1590,9 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)  		pin = entry->pin;  		e.w1 = io_apic_read(entry->apic, 0x10 + pin*2);  		/* Is the remote IRR bit set? */ -		if (e.irr) { -			raw_spin_unlock_irqrestore(&ioapic_lock, flags); +		if (e.irr)  			return true; -		}  	} -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -  	return false;  } @@ -1734,7 +1610,8 @@ static inline bool ioapic_prepare_move(struct irq_data *data)  static inline void ioapic_finish_move(struct irq_data *data, bool moveit)  {  	if (unlikely(moveit)) { -		/* Only migrate the irq if the ack has been received. +		/* +		 * Only migrate the irq if the ack has been received.  		 *  		 * On rare occasions the broadcast level triggered ack gets  		 * delayed going to ioapics, and if we reprogram the @@ -1826,7 +1703,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)  	 * We must acknowledge the irq before we move it or the acknowledge will  	 * not propagate properly.  	 */ -	ack_APIC_irq(); +	apic_eoi();  	/*  	 * Tail end of clearing remote IRR bit (either by delivering the EOI @@ -1917,18 +1794,16 @@ static void ioapic_configure_entry(struct irq_data *irqd)  		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);  } -static int ioapic_set_affinity(struct irq_data *irq_data, -			       const struct cpumask *mask, bool force) +static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force)  {  	struct irq_data *parent = irq_data->parent_data; -	unsigned long flags;  	int ret;  	ret = parent->chip->irq_set_affinity(parent, mask, force); -	raw_spin_lock_irqsave(&ioapic_lock, flags); + +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)  		ioapic_configure_entry(irq_data); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  	return ret;  } @@ -1947,9 +1822,8 @@ static int ioapic_set_affinity(struct irq_data *irq_data,   *   * Verify that the corresponding Remote-IRR bits are clear.   */ -static int ioapic_irq_get_chip_state(struct irq_data *irqd, -				   enum irqchip_irq_state which, -				   bool *state) +static int ioapic_irq_get_chip_state(struct irq_data *irqd, enum irqchip_irq_state which, +				     bool *state)  {  	struct mp_chip_data *mcd = irqd->chip_data;  	struct IO_APIC_route_entry rentry; @@ -1959,7 +1833,8 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd,  		return -EINVAL;  	*state = false; -	raw_spin_lock(&ioapic_lock); + +	guard(raw_spinlock)(&ioapic_lock);  	for_each_irq_pin(p, mcd->irq_2_pin) {  		rentry = __ioapic_read_entry(p->apic, p->pin);  		/* @@ -1973,7 +1848,6 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd,  			break;  		}  	} -	raw_spin_unlock(&ioapic_lock);  	return 0;  } @@ -1987,7 +1861,7 @@ static struct irq_chip ioapic_chip __read_mostly = {  	.irq_set_affinity	= ioapic_set_affinity,  	.irq_retrigger		= irq_chip_retrigger_hierarchy,  	.irq_get_irqchip_state	= ioapic_irq_get_chip_state, -	.flags			= IRQCHIP_SKIP_SET_WAKE | +	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |  				  IRQCHIP_AFFINITY_PRE_STARTUP,  }; @@ -2014,14 +1888,13 @@ static inline void init_IO_APIC_traps(void)  		cfg = irq_cfg(irq);  		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {  			/* -			 * Hmm.. We don't have an entry for this, -			 * so default to an old-fashioned 8259 -			 * interrupt if we can.. +			 * Hmm.. We don't have an entry for this, so +			 * default to an old-fashioned 8259 interrupt if we +			 * can. Otherwise set the dummy interrupt chip.  			 */  			if (irq < nr_legacy_irqs())  				legacy_pic->make_irq(irq);  			else -				/* Strange. Oh, well.. */  				irq_set_chip(irq, &no_irq_chip);  		}  	} @@ -2030,26 +1903,23 @@ static inline void init_IO_APIC_traps(void)  /*   * The local APIC irq-chip implementation:   */ -  static void mask_lapic_irq(struct irq_data *data)  { -	unsigned long v; +	unsigned long v = apic_read(APIC_LVT0); -	v = apic_read(APIC_LVT0);  	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);  }  static void unmask_lapic_irq(struct irq_data *data)  { -	unsigned long v; +	unsigned long v = apic_read(APIC_LVT0); -	v = apic_read(APIC_LVT0);  	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);  }  static void ack_lapic_irq(struct irq_data *data)  { -	ack_APIC_irq(); +	apic_eoi();  }  static struct irq_chip lapic_chip __read_mostly = { @@ -2062,8 +1932,7 @@ static struct irq_chip lapic_chip __read_mostly = {  static void lapic_register_intr(int irq)  {  	irq_clear_status_flags(irq, IRQ_LEVEL); -	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, -				      "edge"); +	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge");  }  /* @@ -2075,9 +1944,9 @@ static void lapic_register_intr(int irq)   */  static inline void __init unlock_ExtINT_logic(void)  { -	int apic, pin, i; -	struct IO_APIC_route_entry entry0, entry1;  	unsigned char save_control, save_freq_select; +	struct IO_APIC_route_entry entry0, entry1; +	int apic, pin, i;  	u32 apic_id;  	pin  = find_isa_irq_pin(8, mp_INT); @@ -2094,7 +1963,7 @@ static inline void __init unlock_ExtINT_logic(void)  	entry0 = ioapic_read_entry(apic, pin);  	clear_IO_APIC_pin(apic, pin); -	apic_id = hard_smp_processor_id(); +	apic_id = read_apic_id();  	memset(&entry1, 0, sizeof(entry1));  	entry1.dest_mode_logical	= true; @@ -2137,10 +2006,10 @@ static int __init disable_timer_pin_setup(char *arg)  }  early_param("disable_timer_pin_1", disable_timer_pin_setup); -static int mp_alloc_timer_irq(int ioapic, int pin) +static int __init mp_alloc_timer_irq(int ioapic, int pin)  { -	int irq = -1;  	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); +	int irq = -1;  	if (domain) {  		struct irq_alloc_info info; @@ -2148,21 +2017,36 @@ static int mp_alloc_timer_irq(int ioapic, int pin)  		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);  		info.devid = mpc_ioapic_id(ioapic);  		info.ioapic.pin = pin; -		mutex_lock(&ioapic_mutex); +		guard(mutex)(&ioapic_mutex);  		irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info); -		mutex_unlock(&ioapic_mutex);  	}  	return irq;  } +static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, +					   int oldapic, int oldpin, +					   int newapic, int newpin) +{ +	struct irq_pin_list *entry; + +	for_each_irq_pin(entry, data->irq_2_pin) { +		if (entry->apic == oldapic && entry->pin == oldpin) { +			entry->apic = newapic; +			entry->pin = newpin; +			return; +		} +	} + +	/* Old apic/pin didn't exist, so just add a new one */ +	add_pin_to_irq_node(data, node, newapic, newpin); +} +  /*   * This code may look a bit paranoid, but it's supposed to cooperate with   * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ   * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast   * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms.   */  static inline void __init check_timer(void)  { @@ -2200,9 +2084,8 @@ static inline void __init check_timer(void)  	pin2  = ioapic_i8259.pin;  	apic2 = ioapic_i8259.apic; -	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " -		    "apic1=%d pin1=%d apic2=%d pin2=%d\n", -		    cfg->vector, apic1, pin1, apic2, pin2); +	pr_info("..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", +		cfg->vector, apic1, pin1, apic2, pin2);  	/*  	 * Some BIOS writers are clueless and report the ExtINTA @@ -2212,7 +2095,7 @@ static inline void __init check_timer(void)  	 * 8259A.  	 */  	if (pin1 == -1) { -		panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); +		panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC");  		pin1 = pin2;  		apic1 = apic2;  		no_pin1 = 1; @@ -2246,13 +2129,10 @@ static inline void __init check_timer(void)  		panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");  		clear_IO_APIC_pin(apic1, pin1);  		if (!no_pin1) -			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " -				    "8254 timer not connected to IO-APIC\n"); +			pr_err("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); -		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " -			    "(IRQ0) through the 8259A ...\n"); -		apic_printk(APIC_QUIET, KERN_INFO -			    "..... (found apic %d pin %d) ...\n", apic2, pin2); +		pr_info("...trying to set up timer (IRQ0) through the 8259A ...\n"); +		pr_info("..... (found apic %d pin %d) ...\n", apic2, pin2);  		/*  		 * legacy devices should be connected to IO APIC #0  		 */ @@ -2261,7 +2141,7 @@ static inline void __init check_timer(void)  		irq_domain_activate_irq(irq_data, false);  		legacy_pic->unmask(0);  		if (timer_irq_works()) { -			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); +			pr_info("....... works.\n");  			goto out;  		}  		/* @@ -2269,26 +2149,24 @@ static inline void __init check_timer(void)  		 */  		legacy_pic->mask(0);  		clear_IO_APIC_pin(apic2, pin2); -		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); +		pr_info("....... failed.\n");  	} -	apic_printk(APIC_QUIET, KERN_INFO -		    "...trying to set up timer as Virtual Wire IRQ...\n"); +	pr_info("...trying to set up timer as Virtual Wire IRQ...\n");  	lapic_register_intr(0);  	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */  	legacy_pic->unmask(0);  	if (timer_irq_works()) { -		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +		pr_info("..... works.\n");  		goto out;  	}  	legacy_pic->mask(0);  	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); -	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); +	pr_info("..... failed.\n"); -	apic_printk(APIC_QUIET, KERN_INFO -		    "...trying to set up timer as ExtINT IRQ...\n"); +	pr_info("...trying to set up timer as ExtINT IRQ...\n");  	legacy_pic->init(0);  	legacy_pic->make_irq(0); @@ -2298,14 +2176,15 @@ static inline void __init check_timer(void)  	unlock_ExtINT_logic();  	if (timer_irq_works()) { -		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); +		pr_info("..... works.\n");  		goto out;  	} -	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); -	if (apic_is_x2apic_enabled()) -		apic_printk(APIC_QUIET, KERN_INFO -			    "Perhaps problem with the pre-enabled x2apic mode\n" -			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); + +	pr_info("..... failed :\n"); +	if (apic_is_x2apic_enabled()) { +		pr_info("Perhaps problem with the pre-enabled x2apic mode\n" +			"Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); +	}  	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "  		"report.  Then try booting with the 'noapic' option.\n");  out: @@ -2333,11 +2212,11 @@ out:  static int mp_irqdomain_create(int ioapic)  { -	struct irq_domain *parent; +	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);  	int hwirqs = mp_ioapic_pin_count(ioapic);  	struct ioapic *ip = &ioapics[ioapic];  	struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; -	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); +	struct irq_domain *parent;  	struct fwnode_handle *fn;  	struct irq_fwspec fwspec; @@ -2346,7 +2225,7 @@ static int mp_irqdomain_create(int ioapic)  	/* Handle device tree enumerated APICs proper */  	if (cfg->dev) { -		fn = of_node_to_fwnode(cfg->dev); +		fn = of_fwnode_handle(cfg->dev);  	} else {  		fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic));  		if (!fn) @@ -2357,16 +2236,15 @@ static int mp_irqdomain_create(int ioapic)  	fwspec.param_count = 1;  	fwspec.param[0] = mpc_ioapic_id(ioapic); -	parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY); +	parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI);  	if (!parent) {  		if (!cfg->dev)  			irq_domain_free_fwnode(fn);  		return -ENODEV;  	} -	ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, -						 (void *)(long)ioapic); - +	ip->irqdomain = irq_domain_create_hierarchy(parent, 0, hwirqs, fn, cfg->ops, +						    (void *)(long)ioapic);  	if (!ip->irqdomain) {  		/* Release fw handle if it was allocated above */  		if (!cfg->dev) @@ -2374,12 +2252,8 @@ static int mp_irqdomain_create(int ioapic)  		return -ENOMEM;  	} -	ip->irqdomain->parent = parent; - -	if (cfg->type == IOAPIC_DOMAIN_LEGACY || -	    cfg->type == IOAPIC_DOMAIN_STRICT) -		ioapic_dynirq_base = max(ioapic_dynirq_base, -					 gsi_cfg->gsi_end + 1); +	if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT) +		ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1);  	return 0;  } @@ -2401,18 +2275,16 @@ void __init setup_IO_APIC(void)  {  	int ioapic; -	if (skip_ioapic_setup || !nr_ioapics) +	if (ioapic_is_disabled || !nr_ioapics)  		return;  	io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; -	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); +	apic_pr_verbose("ENABLING IO-APIC IRQs\n");  	for_each_ioapic(ioapic)  		BUG_ON(mp_irqdomain_create(ioapic)); -	/* -         * Set up IO-APIC IRQ routing. -         */ +	/* Set up IO-APIC IRQ routing. */  	x86_init.mpparse.setup_ioapic_ids();  	sync_Arb_IDs(); @@ -2426,16 +2298,14 @@ void __init setup_IO_APIC(void)  static void resume_ioapic_id(int ioapic_idx)  { -	unsigned long flags;  	union IO_APIC_reg_00 reg_00; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	reg_00.raw = io_apic_read(ioapic_idx, 0);  	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {  		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);  		io_apic_write(ioapic_idx, 0, reg_00.raw);  	} -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  }  static void ioapic_resume(void) @@ -2449,8 +2319,8 @@ static void ioapic_resume(void)  }  static struct syscore_ops ioapic_syscore_ops = { -	.suspend = save_ioapic_entries, -	.resume = ioapic_resume, +	.suspend	= save_ioapic_entries, +	.resume		= ioapic_resume,  };  static int __init ioapic_init_ops(void) @@ -2465,124 +2335,104 @@ device_initcall(ioapic_init_ops);  static int io_apic_get_redir_entries(int ioapic)  {  	union IO_APIC_reg_01	reg_01; -	unsigned long flags; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	reg_01.raw = io_apic_read(ioapic, 1); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -	/* The register returns the maximum index redir index -	 * supported, which is one less than the total number of redir -	 * entries. +	/* +	 * The register returns the maximum index redir index supported, +	 * which is one less than the total number of redir entries.  	 */  	return reg_01.bits.entries + 1;  }  unsigned int arch_dynirq_lower_bound(unsigned int from)  { +	unsigned int ret; +  	/*  	 * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use  	 * gsi_top if ioapic_dynirq_base hasn't been initialized yet.  	 */ -	if (!ioapic_initialized) -		return gsi_top; +	ret = ioapic_dynirq_base ? : gsi_top; +  	/* -	 * For DT enabled machines ioapic_dynirq_base is irrelevant and not -	 * updated. So simply return @from if ioapic_dynirq_base == 0. +	 * For DT enabled machines ioapic_dynirq_base is irrelevant and +	 * always 0. gsi_top can be 0 if there is no IO/APIC registered. +	 * 0 is an invalid interrupt number for dynamic allocations. Return +	 * @from instead.  	 */ -	return ioapic_dynirq_base ? : from; +	return ret ? : from;  }  #ifdef CONFIG_X86_32  static int io_apic_get_unique_id(int ioapic, int apic_id)  { +	static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC); +	const u32 broadcast_id = 0xF;  	union IO_APIC_reg_00 reg_00; -	static physid_mask_t apic_id_map = PHYSID_MASK_NONE; -	physid_mask_t tmp; -	unsigned long flags;  	int i = 0; -	/* -	 * The P4 platform supports up to 256 APIC IDs on two separate APIC -	 * buses (one for LAPICs, one for IOAPICs), where predecessors only -	 * supports up to 16 on one shared APIC bus. -	 * -	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full -	 *      advantage of new APIC bus architecture. -	 */ +	/* Initialize the ID map */ +	if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC)) +		copy_phys_cpu_present_map(apic_id_map); -	if (physids_empty(apic_id_map)) -		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	reg_00.raw = io_apic_read(ioapic, 0); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); +	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) +		reg_00.raw = io_apic_read(ioapic, 0); -	if (apic_id >= get_physical_broadcast()) { -		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " -			"%d\n", ioapic, apic_id, reg_00.bits.ID); +	if (apic_id >= broadcast_id) { +		pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n", +			ioapic, apic_id, reg_00.bits.ID);  		apic_id = reg_00.bits.ID;  	} -	/* -	 * Every APIC in a system must have a unique ID or we get lots of nice -	 * 'stuck on smp_invalidate_needed IPI wait' messages. -	 */ -	if (apic->check_apicid_used(&apic_id_map, apic_id)) { - -		for (i = 0; i < get_physical_broadcast(); i++) { -			if (!apic->check_apicid_used(&apic_id_map, i)) +	/* Every APIC in a system must have a unique ID */ +	if (test_bit(apic_id, apic_id_map)) { +		for (i = 0; i < broadcast_id; i++) { +			if (!test_bit(i, apic_id_map))  				break;  		} -		if (i == get_physical_broadcast()) +		if (i == broadcast_id)  			panic("Max apic_id exceeded!\n"); -		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " -			"trying %d\n", ioapic, apic_id, i); - +		pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i);  		apic_id = i;  	} -	apic->apicid_to_cpu_present(apic_id, &tmp); -	physids_or(apic_id_map, apic_id_map, tmp); +	set_bit(apic_id, apic_id_map);  	if (reg_00.bits.ID != apic_id) {  		reg_00.bits.ID = apic_id; -		raw_spin_lock_irqsave(&ioapic_lock, flags); -		io_apic_write(ioapic, 0, reg_00.raw); -		reg_00.raw = io_apic_read(ioapic, 0); -		raw_spin_unlock_irqrestore(&ioapic_lock, flags); +		scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { +			io_apic_write(ioapic, 0, reg_00.raw); +			reg_00.raw = io_apic_read(ioapic, 0); +		}  		/* Sanity check */  		if (reg_00.bits.ID != apic_id) { -			pr_err("IOAPIC[%d]: Unable to change apic_id!\n", -			       ioapic); +			pr_err("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);  			return -1;  		}  	} -	apic_printk(APIC_VERBOSE, KERN_INFO -			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); +	apic_pr_verbose("IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);  	return apic_id;  }  static u8 io_apic_unique_id(int idx, u8 id)  { -	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && -	    !APIC_XAPIC(boot_cpu_apic_version)) +	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version))  		return io_apic_get_unique_id(idx, id); -	else -		return id; +	return id;  }  #else  static u8 io_apic_unique_id(int idx, u8 id)  {  	union IO_APIC_reg_00 reg_00;  	DECLARE_BITMAP(used, 256); -	unsigned long flags;  	u8 new_id;  	int i; @@ -2598,26 +2448,23 @@ static u8 io_apic_unique_id(int idx, u8 id)  	 * Read the current id from the ioapic and keep it if  	 * available.  	 */ -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	reg_00.raw = io_apic_read(idx, 0); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); +	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) +		reg_00.raw = io_apic_read(idx, 0); +  	new_id = reg_00.bits.ID;  	if (!test_bit(new_id, used)) { -		apic_printk(APIC_VERBOSE, KERN_INFO -			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n", -			 idx, new_id, id); +		apic_pr_verbose("IOAPIC[%d]: Using reg apic_id %d instead of %d\n", +				idx, new_id, id);  		return new_id;  	} -	/* -	 * Get the next free id and write it to the ioapic. -	 */ +	/* Get the next free id and write it to the ioapic. */  	new_id = find_first_zero_bit(used, 256);  	reg_00.bits.ID = new_id; -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	io_apic_write(idx, 0, reg_00.raw); -	reg_00.raw = io_apic_read(idx, 0); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); +	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) { +		io_apic_write(idx, 0, reg_00.raw); +		reg_00.raw = io_apic_read(idx, 0); +	}  	/* Sanity check */  	BUG_ON(reg_00.bits.ID != new_id); @@ -2627,12 +2474,10 @@ static u8 io_apic_unique_id(int idx, u8 id)  static int io_apic_get_version(int ioapic)  { -	union IO_APIC_reg_01	reg_01; -	unsigned long flags; +	union IO_APIC_reg_01 reg_01; -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	reg_01.raw = io_apic_read(ioapic, 1); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  	return reg_01.bits.version;  } @@ -2647,8 +2492,8 @@ static struct resource *ioapic_resources;  static struct resource * __init ioapic_setup_resources(void)  { -	unsigned long n;  	struct resource *res; +	unsigned long n;  	char *mem;  	int i; @@ -2658,9 +2503,7 @@ static struct resource * __init ioapic_setup_resources(void)  	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);  	n *= nr_ioapics; -	mem = memblock_alloc(n, SMP_CACHE_BYTES); -	if (!mem) -		panic("%s: Failed to allocate %lu bytes\n", __func__, n); +	mem = memblock_alloc_or_panic(n, SMP_CACHE_BYTES);  	res = (void *)mem;  	mem += sizeof(struct resource) * nr_ioapics; @@ -2683,10 +2526,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)  	pgprot_t flags = FIXMAP_PAGE_NOCACHE;  	/* -	 * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot +	 * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot  	 * bits, just like normal ioremap():  	 */ -	flags = pgprot_decrypted(flags); +	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { +		if (x86_platform.hyper.is_private_mmio(phys)) +			flags = pgprot_encrypted(flags); +		else +			flags = pgprot_decrypted(flags); +	}  	__set_fixmap(idx, phys, flags);  } @@ -2703,12 +2551,10 @@ void __init io_apic_init_mappings(void)  			ioapic_phys = mpc_ioapic_addr(i);  #ifdef CONFIG_X86_32  			if (!ioapic_phys) { -				printk(KERN_ERR -				       "WARNING: bogus zero IO-APIC " -				       "address found in MPTABLE, " +				pr_err("WARNING: bogus zero IO-APIC address found in MPTABLE, "  				       "disabling IO/APIC support!\n");  				smp_found_config = 0; -				skip_ioapic_setup = 1; +				ioapic_is_disabled = true;  				goto fake_ioapic_page;  			}  #endif @@ -2716,17 +2562,13 @@ void __init io_apic_init_mappings(void)  #ifdef CONFIG_X86_32  fake_ioapic_page:  #endif -			ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE, +			ioapic_phys = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE,  								    PAGE_SIZE); -			if (!ioapic_phys) -				panic("%s: Failed to allocate %lu bytes align=0x%lx\n", -				      __func__, PAGE_SIZE, PAGE_SIZE);  			ioapic_phys = __pa(ioapic_phys);  		}  		io_apic_set_fixmap(idx, ioapic_phys); -		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", -			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), -			ioapic_phys); +		apic_pr_verbose("mapped IOAPIC to %08lx (%08lx)\n", +				__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), ioapic_phys);  		idx++;  		ioapic_res->start = ioapic_phys; @@ -2737,13 +2579,12 @@ fake_ioapic_page:  void __init ioapic_insert_resources(void)  { -	int i;  	struct resource *r = ioapic_resources; +	int i;  	if (!r) {  		if (nr_ioapics > 0) -			printk(KERN_ERR -				"IO APIC resources couldn't be allocated.\n"); +			pr_err("IO APIC resources couldn't be allocated.\n");  		return;  	} @@ -2763,11 +2604,12 @@ int mp_find_ioapic(u32 gsi)  	/* Find the IOAPIC that manages this GSI. */  	for_each_ioapic(i) {  		struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); +  		if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)  			return i;  	} -	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); +	pr_err("ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);  	return -1;  } @@ -2806,12 +2648,10 @@ static int bad_ioapic_register(int idx)  static int find_free_ioapic_entry(void)  { -	int idx; - -	for (idx = 0; idx < MAX_IO_APICS; idx++) +	for (int idx = 0; idx < MAX_IO_APICS; idx++) {  		if (ioapics[idx].nr_registers == 0)  			return idx; - +	}  	return MAX_IO_APICS;  } @@ -2822,8 +2662,7 @@ static int find_free_ioapic_entry(void)   * @gsi_base:	base of GSI associated with the IOAPIC   * @cfg:	configuration information for the IOAPIC   */ -int mp_register_ioapic(int id, u32 address, u32 gsi_base, -		       struct ioapic_domain_cfg *cfg) +int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg)  {  	bool hotplug = !!ioapic_initialized;  	struct mp_ioapic_gsi *gsi_cfg; @@ -2834,12 +2673,13 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,  		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");  		return -EINVAL;  	} -	for_each_ioapic(ioapic) + +	for_each_ioapic(ioapic) {  		if (ioapics[ioapic].mp_config.apicaddr == address) { -			pr_warn("address 0x%x conflicts with IOAPIC%d\n", -				address, ioapic); +			pr_warn("address 0x%x conflicts with IOAPIC%d\n", address, ioapic);  			return -EEXIST;  		} +	}  	idx = find_free_ioapic_entry();  	if (idx >= MAX_IO_APICS) { @@ -2874,8 +2714,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,  		    (gsi_end >= gsi_cfg->gsi_base &&  		     gsi_end <= gsi_cfg->gsi_end)) {  			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n", -				gsi_base, gsi_end, -				gsi_cfg->gsi_base, gsi_cfg->gsi_end); +				gsi_base, gsi_end, gsi_cfg->gsi_base, gsi_cfg->gsi_end);  			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);  			return -ENOSPC;  		} @@ -2909,8 +2748,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,  	ioapics[idx].nr_registers = entries;  	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", -		idx, mpc_ioapic_id(idx), -		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), +		idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),  		gsi_cfg->gsi_base, gsi_cfg->gsi_end);  	return 0; @@ -2921,11 +2759,13 @@ int mp_unregister_ioapic(u32 gsi_base)  	int ioapic, pin;  	int found = 0; -	for_each_ioapic(ioapic) +	for_each_ioapic(ioapic) {  		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {  			found = 1;  			break;  		} +	} +  	if (!found) {  		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);  		return -ENODEV; @@ -2939,8 +2779,7 @@ int mp_unregister_ioapic(u32 gsi_base)  		if (irq >= 0) {  			data = irq_get_chip_data(irq);  			if (data && data->count) { -				pr_warn("pin%d on IOAPIC%d is still in use.\n", -					pin, ioapic); +				pr_warn("pin%d on IOAPIC%d is still in use.\n",	pin, ioapic);  				return -EBUSY;  			}  		} @@ -2975,8 +2814,7 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,  	if (info && info->ioapic.valid) {  		data->is_level = info->ioapic.is_level;  		data->active_low = info->ioapic.active_low; -	} else if (__acpi_get_override_irq(gsi, &data->is_level, -					   &data->active_low) < 0) { +	} else if (__acpi_get_override_irq(gsi, &data->is_level, &data->active_low) < 0) {  		/* PCI interrupts are always active low level triggered. */  		data->is_level = true;  		data->active_low = true; @@ -3034,10 +2872,8 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,  		return -ENOMEM;  	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); -	if (ret < 0) { -		kfree(data); -		return ret; -	} +	if (ret < 0) +		goto free_data;  	INIT_LIST_HEAD(&data->irq_2_pin);  	irq_data->hwirq = info->ioapic.pin; @@ -3046,7 +2882,10 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,  	irq_data->chip_data = data;  	mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); -	add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); +	if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin)) { +		ret = -ENOMEM; +		goto free_irqs; +	}  	mp_preconfigure_entry(data);  	mp_register_handler(virq, data->is_level); @@ -3056,11 +2895,15 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,  		legacy_pic->mask(virq);  	local_irq_restore(flags); -	apic_printk(APIC_VERBOSE, KERN_DEBUG -		    "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", -		    ioapic, mpc_ioapic_id(ioapic), pin, virq, -		    data->is_level, data->active_low); +	apic_pr_verbose("IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n", +			ioapic, mpc_ioapic_id(ioapic), pin, virq, data->is_level, data->active_low);  	return 0; + +free_irqs: +	irq_domain_free_irqs_parent(domain, virq, nr_irqs); +free_data: +	kfree(data); +	return ret;  }  void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, @@ -3073,22 +2916,17 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,  	irq_data = irq_domain_get_irq_data(domain, virq);  	if (irq_data && irq_data->chip_data) {  		data = irq_data->chip_data; -		__remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), -				      (int)irq_data->hwirq); +		__remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);  		WARN_ON(!list_empty(&data->irq_2_pin));  		kfree(irq_data->chip_data);  	}  	irq_domain_free_irqs_top(domain, virq, nr_irqs);  } -int mp_irqdomain_activate(struct irq_domain *domain, -			  struct irq_data *irq_data, bool reserve) +int mp_irqdomain_activate(struct irq_domain *domain, struct irq_data *irq_data, bool reserve)  { -	unsigned long flags; - -	raw_spin_lock_irqsave(&ioapic_lock, flags); +	guard(raw_spinlock_irqsave)(&ioapic_lock);  	ioapic_configure_entry(irq_data); -	raw_spin_unlock_irqrestore(&ioapic_lock, flags);  	return 0;  } @@ -3096,8 +2934,7 @@ void mp_irqdomain_deactivate(struct irq_domain *domain,  			     struct irq_data *irq_data)  {  	/* It won't be called for IRQ with multiple IOAPIC pins associated */ -	ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), -			  (int)irq_data->hwirq); +	ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);  }  int mp_irqdomain_ioapic_idx(struct irq_domain *domain) diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index d1fb874fbe64..98a57cb4aa86 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,7 +1,10 @@  // SPDX-License-Identifier: GPL-2.0  #include <linux/cpumask.h> +#include <linux/delay.h>  #include <linux/smp.h> +#include <linux/string_choices.h> +  #include <asm/io_apic.h>  #include "local.h" @@ -21,7 +24,7 @@ __setup("no_ipi_broadcast=", apic_ipi_shorthand);  static int __init print_ipi_mode(void)  {  	pr_info("IPI shorthand broadcast: %s\n", -		apic_ipi_shorthand_off ? "disabled" : "enabled"); +		str_disabled_enabled(apic_ipi_shorthand_off));  	return 0;  }  late_initcall(print_ipi_mode); @@ -52,9 +55,9 @@ void apic_send_IPI_allbutself(unsigned int vector)  		return;  	if (static_branch_likely(&apic_use_ipi_shorthand)) -		apic->send_IPI_allbutself(vector); +		__apic_send_IPI_allbutself(vector);  	else -		apic->send_IPI_mask_allbutself(cpu_online_mask, vector); +		__apic_send_IPI_mask_allbutself(cpu_online_mask, vector);  }  /* @@ -68,12 +71,12 @@ void native_smp_send_reschedule(int cpu)  		WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu);  		return;  	} -	apic->send_IPI(cpu, RESCHEDULE_VECTOR); +	__apic_send_IPI(cpu, RESCHEDULE_VECTOR);  }  void native_send_call_func_single_ipi(int cpu)  { -	apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); +	__apic_send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);  }  void native_send_call_func_ipi(const struct cpumask *mask) @@ -85,91 +88,102 @@ void native_send_call_func_ipi(const struct cpumask *mask)  			goto sendmask;  		if (cpumask_test_cpu(cpu, mask)) -			apic->send_IPI_all(CALL_FUNCTION_VECTOR); +			__apic_send_IPI_all(CALL_FUNCTION_VECTOR);  		else if (num_online_cpus() > 1) -			apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); +			__apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);  		return;  	}  sendmask: -	apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); +	__apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);  } +void apic_send_nmi_to_offline_cpu(unsigned int cpu) +{ +	if (WARN_ON_ONCE(!apic->nmi_to_offline_cpu)) +		return; +	if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, &cpus_booted_once_mask))) +		return; +	apic->send_IPI(cpu, NMI_VECTOR); +}  #endif /* CONFIG_SMP */  static inline int __prepare_ICR2(unsigned int mask)  { -	return SET_APIC_DEST_FIELD(mask); +	return SET_XAPIC_DEST_FIELD(mask); +} + +u32 apic_mem_wait_icr_idle_timeout(void) +{ +	int cnt; + +	for (cnt = 0; cnt < 1000; cnt++) { +		if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY)) +			return 0; +		inc_irq_stat(icr_read_retry_count); +		udelay(100); +	} +	return APIC_ICR_BUSY;  } -static inline void __xapic_wait_icr_idle(void) +void apic_mem_wait_icr_idle(void)  {  	while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)  		cpu_relax();  } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector) +/* + * This is safe against interruption because it only writes the lower 32 + * bits of the APIC_ICR register. The destination field is ignored for + * short hand IPIs. + * + *  wait_icr_idle() + *  write(ICR2, dest) + *  NMI + *	wait_icr_idle() + *	write(ICR) + *	wait_icr_idle() + *  write(ICR) + * + * This function does not need to disable interrupts as there is no ICR2 + * interaction. The memory write is direct except when the machine is + * affected by the 11AP Pentium erratum, which turns the plain write into + * an XCHG operation. + */ +static void __default_send_IPI_shortcut(unsigned int shortcut, int vector)  {  	/* -	 * Subtle. In the case of the 'never do double writes' workaround -	 * we have to lock out interrupts to be safe.  As we don't care -	 * of the value read we use an atomic rmw access to avoid costly -	 * cli/sti.  Otherwise we use an even cheaper single atomic write -	 * to the APIC. -	 */ -	unsigned int cfg; - -	/* -	 * Wait for idle. +	 * Wait for the previous ICR command to complete.  Use +	 * safe_apic_wait_icr_idle() for the NMI vector as there have been +	 * issues where otherwise the system hangs when the panic CPU tries +	 * to stop the others before launching the kdump kernel.  	 */  	if (unlikely(vector == NMI_VECTOR)) -		safe_apic_wait_icr_idle(); +		apic_mem_wait_icr_idle_timeout();  	else -		__xapic_wait_icr_idle(); - -	/* -	 * No need to touch the target chip field. Also the destination -	 * mode is ignored when a shorthand is used. -	 */ -	cfg = __prepare_ICR(shortcut, vector, 0); +		apic_mem_wait_icr_idle(); -	/* -	 * Send the IPI. The write to APIC_ICR fires this off. -	 */ -	native_apic_mem_write(APIC_ICR, cfg); +	/* Destination field (ICR2) and the destination mode are ignored */ +	native_apic_mem_write(APIC_ICR, __prepare_ICR(shortcut, vector, 0));  }  /*   * This is used to send an IPI with no shorthand notation (the destination is   * specified in bits 56 to 63 of the ICR).   */ -void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +void __default_send_IPI_dest_field(unsigned int dest_mask, int vector, +				   unsigned int dest_mode)  { -	unsigned long cfg; - -	/* -	 * Wait for idle. -	 */ +	/* See comment in __default_send_IPI_shortcut() */  	if (unlikely(vector == NMI_VECTOR)) -		safe_apic_wait_icr_idle(); +		apic_mem_wait_icr_idle_timeout();  	else -		__xapic_wait_icr_idle(); +		apic_mem_wait_icr_idle(); -	/* -	 * prepare target chip field -	 */ -	cfg = __prepare_ICR2(mask); -	native_apic_mem_write(APIC_ICR2, cfg); - -	/* -	 * program the ICR -	 */ -	cfg = __prepare_ICR(0, vector, dest); - -	/* -	 * Send the IPI. The write to APIC_ICR fires this off. -	 */ -	native_apic_mem_write(APIC_ICR, cfg); +	/* Set the IPI destination field in the ICR */ +	native_apic_mem_write(APIC_ICR2, __prepare_ICR2(dest_mask)); +	/* Send it with the proper destination mode */ +	native_apic_mem_write(APIC_ICR, __prepare_ICR(0, vector, dest_mode));  }  void default_send_IPI_single_phys(int cpu, int vector) @@ -184,18 +198,13 @@ void default_send_IPI_single_phys(int cpu, int vector)  void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)  { -	unsigned long query_cpu;  	unsigned long flags; +	unsigned long cpu; -	/* -	 * Hack. The clustered APIC addressing mode doesn't allow us to send -	 * to an arbitrary mask, so I do a unicast to each CPU instead. -	 * - mbligh -	 */  	local_irq_save(flags); -	for_each_cpu(query_cpu, mask) { +	for_each_cpu(cpu, mask) {  		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, -				query_cpu), vector, APIC_DEST_PHYSICAL); +				cpu), vector, APIC_DEST_PHYSICAL);  	}  	local_irq_restore(flags);  } @@ -203,18 +212,15 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)  void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,  						 int vector)  { -	unsigned int this_cpu = smp_processor_id(); -	unsigned int query_cpu; +	unsigned int cpu, this_cpu = smp_processor_id();  	unsigned long flags; -	/* See Hack comment above */ -  	local_irq_save(flags); -	for_each_cpu(query_cpu, mask) { -		if (query_cpu == this_cpu) +	for_each_cpu(cpu, mask) { +		if (cpu == this_cpu)  			continue;  		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, -				 query_cpu), vector, APIC_DEST_PHYSICAL); +				 cpu), vector, APIC_DEST_PHYSICAL);  	}  	local_irq_restore(flags);  } @@ -224,7 +230,7 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,   */  void default_send_IPI_single(int cpu, int vector)  { -	apic->send_IPI_mask(cpumask_of(cpu), vector); +	__apic_send_IPI_mask(cpumask_of(cpu), vector);  }  void default_send_IPI_allbutself(int vector) @@ -243,50 +249,32 @@ void default_send_IPI_self(int vector)  }  #ifdef CONFIG_X86_32 - -void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, -						 int vector) +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector)  {  	unsigned long flags; -	unsigned int query_cpu; - -	/* -	 * Hack. The clustered APIC addressing mode doesn't allow us to send -	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This -	 * should be modified to do 1 message per cluster ID - mbligh -	 */ +	unsigned int cpu;  	local_irq_save(flags); -	for_each_cpu(query_cpu, mask) -		__default_send_IPI_dest_field( -			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), -			vector, APIC_DEST_LOGICAL); +	for_each_cpu(cpu, mask) +		__default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL);  	local_irq_restore(flags);  }  void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,  						 int vector)  { +	unsigned int cpu, this_cpu = smp_processor_id();  	unsigned long flags; -	unsigned int query_cpu; -	unsigned int this_cpu = smp_processor_id(); - -	/* See Hack comment above */  	local_irq_save(flags); -	for_each_cpu(query_cpu, mask) { -		if (query_cpu == this_cpu) +	for_each_cpu(cpu, mask) { +		if (cpu == this_cpu)  			continue; -		__default_send_IPI_dest_field( -			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), -			vector, APIC_DEST_LOGICAL); -		} +		__default_send_IPI_dest_field(1U << cpu, vector, APIC_DEST_LOGICAL); +	}  	local_irq_restore(flags);  } -/* - * This is only used on smaller machines. - */  void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)  {  	unsigned long mask = cpumask_bits(cpumask)[0]; @@ -300,32 +288,4 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)  	__default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);  	local_irq_restore(flags);  } - -/* must come after the send_IPI functions above for inlining */ -static int convert_apicid_to_cpu(int apic_id) -{ -	int i; - -	for_each_possible_cpu(i) { -		if (per_cpu(x86_cpu_to_apicid, i) == apic_id) -			return i; -	} -	return -1; -} - -int safe_smp_processor_id(void) -{ -	int apicid, cpuid; - -	if (!boot_cpu_has(X86_FEATURE_APIC)) -		return 0; - -	apicid = hard_smp_processor_id(); -	if (apicid == BAD_APICID) -		return 0; - -	cpuid = convert_apicid_to_cpu(apicid); - -	return cpuid >= 0 ? cpuid : 0; -}  #endif diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index a997d849509a..bdcf609eb283 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -13,18 +13,14 @@  #include <asm/irq_vectors.h>  #include <asm/apic.h> -/* APIC flat 64 */ -void flat_init_apic_ldr(void); -  /* X2APIC */ -int x2apic_apic_id_valid(u32 apicid); -int x2apic_apic_id_registered(void);  void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); -unsigned int x2apic_get_apic_id(unsigned long id); -u32 x2apic_set_apic_id(unsigned int id); -int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +u32 x2apic_get_apic_id(u32 id); + +void x2apic_send_IPI_all(int vector); +void x2apic_send_IPI_allbutself(int vector);  void x2apic_send_IPI_self(int vector); -void __x2apic_send_IPI_shorthand(int vector, u32 which); +extern u32 x2apic_max_apicid;  /* IPI */ @@ -46,7 +42,10 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,  	return icr;  } -void __default_send_IPI_shortcut(unsigned int shortcut, int vector); +void default_init_apic_ldr(void); + +void apic_mem_wait_icr_idle(void); +u32 apic_mem_wait_icr_idle_timeout(void);  /*   * This is used to send an IPI with no shorthand notation (the destination is diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7517eb05bdc1..66bc5d3e79db 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -55,14 +55,14 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)  	 * caused by the non-atomic update of the address/data pair.  	 *  	 * Direct update is possible when: -	 * - The MSI is maskable (remapped MSI does not use this code path)). -	 *   The quirk bit is not set in this case. +	 * - The MSI is maskable (remapped MSI does not use this code path). +	 *   The reservation mode bit is set in this case.  	 * - The new vector is the same as the old vector  	 * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)  	 * - The interrupt is not yet started up  	 * - The new destination CPU is the same as the old destination CPU  	 */ -	if (!irqd_msi_nomask_quirk(irqd) || +	if (!irqd_can_reserve(irqd) ||  	    cfg->vector == old_cfg.vector ||  	    old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||  	    !irqd_is_started(irqd) || @@ -142,70 +142,133 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)  	return ret;  } -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. +/** + * pci_dev_has_default_msi_parent_domain - Check whether the device has the default + *					   MSI parent domain associated + * @dev:	Pointer to the PCI device   */ -static struct irq_chip pci_msi_controller = { -	.name			= "PCI-MSI", -	.irq_unmask		= pci_msi_unmask_irq, -	.irq_mask		= pci_msi_mask_irq, -	.irq_ack		= irq_chip_ack_parent, -	.irq_retrigger		= irq_chip_retrigger_hierarchy, -	.irq_set_affinity	= msi_set_affinity, -	.flags			= IRQCHIP_SKIP_SET_WAKE | -				  IRQCHIP_AFFINITY_PRE_STARTUP, -}; +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev) +{ +	struct irq_domain *domain = dev_get_msi_domain(&dev->dev); -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, -		    msi_alloc_info_t *arg) +	if (!domain) +		domain = dev_get_msi_domain(&dev->bus->dev); +	if (!domain) +		return false; + +	return domain == x86_vector_domain; +} + +/** + * x86_msi_prepare - Setup of msi_alloc_info_t for allocations + * @domain:	The domain for which this setup happens + * @dev:	The device for which interrupts are allocated + * @nvec:	The number of vectors to allocate + * @alloc:	The allocation info structure to initialize + * + * This function is to be used for all types of MSI domains above the x86 + * vector domain and any intermediates. It is always invoked from the + * top level interrupt domain. The domain specific allocation + * functionality is determined via the @domain's bus token which allows to + * map the X86 specific allocation type. + */ +static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, +			   int nvec, msi_alloc_info_t *alloc)  { -	init_irq_alloc_info(arg, NULL); -	if (to_pci_dev(dev)->msix_enabled) { -		arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; -	} else { -		arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; -		arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; +	struct msi_domain_info *info = domain->host_data; + +	init_irq_alloc_info(alloc, NULL); + +	switch (info->bus_token) { +	case DOMAIN_BUS_PCI_DEVICE_MSI: +		alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; +		return 0; +	case DOMAIN_BUS_PCI_DEVICE_MSIX: +		alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; +		return 0; +	default: +		return -EINVAL;  	} - -	return 0;  } -EXPORT_SYMBOL_GPL(pci_msi_prepare); -static struct msi_domain_ops pci_msi_domain_ops = { -	.msi_prepare	= pci_msi_prepare, -}; +/** + * x86_init_dev_msi_info - Domain info setup for MSI domains + * @dev:		The device for which the domain should be created + * @domain:		The (root) domain providing this callback + * @real_parent:	The real parent domain of the to initialize domain + * @info:		The domain info for the to initialize domain + * + * This function is to be used for all types of MSI domains above the x86 + * vector domain and any intermediates. The domain specific functionality + * is determined via the @real_parent. + */ +static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, +				  struct irq_domain *real_parent, struct msi_domain_info *info) +{ +	const struct msi_parent_ops *pops = real_parent->msi_parent_ops; + +	/* MSI parent domain specific settings */ +	switch (real_parent->bus_token) { +	case DOMAIN_BUS_ANY: +		/* Only the vector domain can have the ANY token */ +		if (WARN_ON_ONCE(domain != real_parent)) +			return false; +		info->chip->irq_set_affinity = msi_set_affinity; +		info->chip->flags |= IRQCHIP_MOVE_DEFERRED; +		break; +	case DOMAIN_BUS_DMAR: +	case DOMAIN_BUS_AMDVI: +		break; +	default: +		WARN_ON_ONCE(1); +		return false; +	} + +	/* Is the target supported? */ +	switch(info->bus_token) { +	case DOMAIN_BUS_PCI_DEVICE_MSI: +	case DOMAIN_BUS_PCI_DEVICE_MSIX: +		break; +	default: +		WARN_ON_ONCE(1); +		return false; +	} + +	/* +	 * Mask out the domain specific MSI feature flags which are not +	 * supported by the real parent. +	 */ +	info->flags			&= pops->supported_flags; +	/* Enforce the required flags */ +	info->flags			|= X86_VECTOR_MSI_FLAGS_REQUIRED; + +	/* This is always invoked from the top level MSI domain! */ +	info->ops->msi_prepare		= x86_msi_prepare; + +	info->chip->irq_ack		= irq_chip_ack_parent; +	info->chip->irq_retrigger	= irq_chip_retrigger_hierarchy; +	info->chip->flags		|= IRQCHIP_SKIP_SET_WAKE | +					   IRQCHIP_AFFINITY_PRE_STARTUP; -static struct msi_domain_info pci_msi_domain_info = { -	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | -			  MSI_FLAG_PCI_MSIX, -	.ops		= &pci_msi_domain_ops, -	.chip		= &pci_msi_controller, -	.handler	= handle_edge_irq, -	.handler_name	= "edge", +	info->handler			= handle_edge_irq; +	info->handler_name		= "edge"; + +	return true; +} + +static const struct msi_parent_ops x86_vector_msi_parent_ops = { +	.supported_flags	= X86_VECTOR_MSI_FLAGS_SUPPORTED, +	.init_dev_msi_info	= x86_init_dev_msi_info,  };  struct irq_domain * __init native_create_pci_msi_domain(void)  { -	struct fwnode_handle *fn; -	struct irq_domain *d; - -	if (disable_apic) -		return NULL; - -	fn = irq_domain_alloc_named_fwnode("PCI-MSI"); -	if (!fn) +	if (apic_is_disabled)  		return NULL; -	d = pci_msi_create_irq_domain(fn, &pci_msi_domain_info, -				      x86_vector_domain); -	if (!d) { -		irq_domain_free_fwnode(fn); -		pr_warn("Failed to initialize PCI-MSI irqdomain.\n"); -	} else { -		d->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; -	} -	return d; +	x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; +	x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops; +	return x86_vector_domain;  }  void __init x86_create_pci_msi_domain(void) @@ -213,41 +276,19 @@ void __init x86_create_pci_msi_domain(void)  	x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain();  } -#ifdef CONFIG_IRQ_REMAP -static struct irq_chip pci_msi_ir_controller = { -	.name			= "IR-PCI-MSI", -	.irq_unmask		= pci_msi_unmask_irq, -	.irq_mask		= pci_msi_mask_irq, -	.irq_ack		= irq_chip_ack_parent, -	.irq_retrigger		= irq_chip_retrigger_hierarchy, -	.flags			= IRQCHIP_SKIP_SET_WAKE | -				  IRQCHIP_AFFINITY_PRE_STARTUP, -}; - -static struct msi_domain_info pci_msi_ir_domain_info = { -	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | -			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, -	.ops		= &pci_msi_domain_ops, -	.chip		= &pci_msi_ir_controller, -	.handler	= handle_edge_irq, -	.handler_name	= "edge", -}; - -struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, -						    const char *name, int id) +/* Keep around for hyperV */ +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, +		    msi_alloc_info_t *arg)  { -	struct fwnode_handle *fn; -	struct irq_domain *d; +	init_irq_alloc_info(arg, NULL); -	fn = irq_domain_alloc_named_id_fwnode(name, id); -	if (!fn) -		return NULL; -	d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); -	if (!d) -		irq_domain_free_fwnode(fn); -	return d; +	if (to_pci_dev(dev)->msix_enabled) +		arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; +	else +		arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; +	return 0;  } -#endif +EXPORT_SYMBOL_GPL(pci_msi_prepare);  #ifdef CONFIG_DMAR_TABLE  /* @@ -275,7 +316,7 @@ static struct irq_chip dmar_msi_controller = {  	.irq_retrigger		= irq_chip_retrigger_hierarchy,  	.irq_compose_msi_msg	= dmar_msi_compose_msg,  	.irq_write_msi_msg	= dmar_msi_write_msg, -	.flags			= IRQCHIP_SKIP_SET_WAKE | +	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |  				  IRQCHIP_AFFINITY_PRE_STARTUP,  }; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index a61f642b1b90..87bc9e7ca5d6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -10,49 +10,22 @@  #include <linux/errno.h>  #include <linux/smp.h> +#include <xen/xen.h> +  #include <asm/io_apic.h>  #include <asm/apic.h>  #include <asm/acpi.h>  #include "local.h" -static int default_x86_32_early_logical_apicid(int cpu) +static u32 default_get_apic_id(u32 x)  { -	return 1 << cpu; -} - -static void setup_apic_flat_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC -	printk(KERN_INFO -		"Enabling APIC mode:  Flat.  Using %d I/O APICs\n", -		nr_ioapics); -#endif -} +	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); -static int default_apic_id_registered(void) -{ -	return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -/* - * Set up the logical destination ID.  Intel recommends to set DFR, LDR and - * TPR before enabling an APIC.  See e.g. "AP-388 82489DX User's Manual" - * (Intel document number 292116). - */ -static void default_init_apic_ldr(void) -{ -	unsigned long val; - -	apic_write(APIC_DFR, APIC_DFR_VALUE); -	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; -	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); -	apic_write(APIC_LDR, val); -} - -static int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ -	return cpuid_apic >> index_msb; +	if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) +		return (x >> 24) & 0xFF; +	else +		return (x >> 24) & 0x0F;  }  /* should be called last. */ @@ -65,26 +38,16 @@ static struct apic apic_default __ro_after_init = {  	.name				= "default",  	.probe				= probe_default, -	.acpi_madt_oem_check		= NULL, -	.apic_id_valid			= default_apic_id_valid, -	.apic_id_registered		= default_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= true,  	.disable_esr			= 0, -	.check_apicid_used		= default_check_apicid_used,  	.init_apic_ldr			= default_init_apic_ldr, -	.ioapic_phys_id_map		= default_ioapic_phys_id_map, -	.setup_apic_routing		= setup_apic_flat_routing,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= physid_set_mask_of_physid, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= default_phys_pkg_id, +	.max_apic_id			= 0xFE,  	.get_apic_id			= default_get_apic_id, -	.set_apic_id			= NULL,  	.calc_dest_apicid		= apic_flat_calc_apicid, @@ -95,17 +58,13 @@ static struct apic apic_default __ro_after_init = {  	.send_IPI_all			= default_send_IPI_all,  	.send_IPI_self			= default_send_IPI_self, -	.inquire_remote_apic		= default_inquire_remote_apic, -  	.read				= native_apic_mem_read,  	.write				= native_apic_mem_write, -	.eoi_write			= native_apic_mem_write, +	.eoi				= native_apic_mem_eoi,  	.icr_read			= native_apic_icr_read,  	.icr_write			= native_apic_icr_write, -	.wait_icr_idle			= native_apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle, - -	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid, +	.wait_icr_idle			= apic_mem_wait_icr_idle, +	.safe_wait_icr_idle		= apic_mem_wait_icr_idle_timeout,  };  apic_driver(apic_default); @@ -123,7 +82,7 @@ static int __init parse_apic(char *arg)  	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {  		if (!strcmp((*drv)->name, arg)) { -			apic = *drv; +			apic_install_driver(*drv);  			cmdline_apic = 1;  			return 0;  		} @@ -134,49 +93,14 @@ static int __init parse_apic(char *arg)  }  early_param("apic", parse_apic); -void __init default_setup_apic_routing(void) -{ -	int version = boot_cpu_apic_version; - -	if (num_possible_cpus() > 8) { -		switch (boot_cpu_data.x86_vendor) { -		case X86_VENDOR_INTEL: -			if (!APIC_XAPIC(version)) { -				def_to_bigsmp = 0; -				break; -			} -			/* P4 and above */ -			fallthrough; -		case X86_VENDOR_HYGON: -		case X86_VENDOR_AMD: -			def_to_bigsmp = 1; -		} -	} - -#ifdef CONFIG_X86_BIGSMP -	/* -	 * This is used to switch to bigsmp mode when -	 * - There is no apic= option specified by the user -	 * - generic_apic_probe() has chosen apic_default as the sub_arch -	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support -	 */ - -	if (!cmdline_apic && apic == &apic_default) -		generic_bigsmp_probe(); -#endif - -	if (apic->setup_apic_routing) -		apic->setup_apic_routing(); -} - -void __init generic_apic_probe(void) +void __init x86_32_probe_apic(void)  {  	if (!cmdline_apic) {  		struct apic **drv;  		for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {  			if ((*drv)->probe()) { -				apic = *drv; +				apic_install_driver(*drv);  				break;  			}  		} @@ -184,26 +108,4 @@ void __init generic_apic_probe(void)  		if (drv == __apicdrivers_end)  			panic("Didn't find an APIC driver");  	} -	printk(KERN_INFO "Using APIC driver %s\n", apic->name); -} - -/* This function can switch the APIC even after the initial ->probe() */ -int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -	struct apic **drv; - -	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { -		if (!(*drv)->acpi_madt_oem_check) -			continue; -		if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) -			continue; - -		if (!cmdline_apic) { -			apic = *drv; -			printk(KERN_INFO "Switched to APIC driver `%s'.\n", -			       apic->name); -		} -		return 1; -	} -	return 0;  } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c46720f185c0..ecdf0c4121e1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -13,10 +13,8 @@  #include "local.h" -/* - * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. - */ -void __init default_setup_apic_routing(void) +/* Select the appropriate APIC driver */ +void __init x86_64_probe_apic(void)  {  	struct apic **drv; @@ -24,11 +22,7 @@ void __init default_setup_apic_routing(void)  	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {  		if ((*drv)->probe && (*drv)->probe()) { -			if (apic != *drv) { -				apic = *drv; -				pr_info("Switched APIC routing to %s.\n", -					apic->name); -			} +			apic_install_driver(*drv);  			break;  		}  	} @@ -40,11 +34,7 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {  		if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) { -			if (apic != *drv) { -				apic = *drv; -				pr_info("Setting APIC routing to %s.\n", -					apic->name); -			} +			apic_install_driver(*drv);  			return 1;  		}  	} diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3e6f6b448f6a..a947b46a8b64 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -44,7 +44,18 @@ static cpumask_var_t vector_searchmask;  static struct irq_chip lapic_controller;  static struct irq_matrix *vector_matrix;  #ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct hlist_head, cleanup_list); + +static void vector_cleanup_callback(struct timer_list *tmr); + +struct vector_cleanup { +	struct hlist_head	head; +	struct timer_list	timer; +}; + +static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = { +	.head	= HLIST_HEAD_INIT, +	.timer	= __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED), +};  #endif  void lock_vector_lock(void) @@ -172,6 +183,7 @@ setnew:  	apicd->cpu = newcpu;  	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));  	per_cpu(vector_irq, newcpu)[newvec] = desc; +	apic_update_irq_cfg(irqd, newvec, newcpu);  }  static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -250,7 +262,6 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)  	if (vector < 0)  		return vector;  	apic_update_vector(irqd, vector, cpu); -	apic_update_irq_cfg(irqd, vector, cpu);  	return 0;  } @@ -327,7 +338,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)  	if (vector < 0)  		return vector;  	apic_update_vector(irqd, vector, cpu); -	apic_update_irq_cfg(irqd, vector, cpu); +  	return 0;  } @@ -536,13 +547,9 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,  	struct irq_data *irqd;  	int i, err, node; -	if (disable_apic) +	if (apic_is_disabled)  		return -ENXIO; -	/* Currently vector allocator can't guarantee contiguous allocations */ -	if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) -		return -ENOSYS; -  	/*  	 * Catch any attempt to touch the cascade interrupt on a PIC  	 * equipped system. @@ -684,7 +691,7 @@ static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec,  	 * if IRQ remapping is enabled. APIC IDs above 15 bits are  	 * only permitted if IRQ remapping is enabled, so check that.  	 */ -	if (apic->apic_id_valid(32768)) +	if (apic_id_valid(32768))  		return 0;  	return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec); @@ -705,8 +712,8 @@ int __init arch_probe_nr_irqs(void)  {  	int nr; -	if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) -		nr_irqs = NR_VECTORS * nr_cpu_ids; +	if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids) +		irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids);  	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;  #if defined(CONFIG_PCI_MSI) @@ -718,8 +725,8 @@ int __init arch_probe_nr_irqs(void)  	else  		nr += gsi_top * 16;  #endif -	if (nr < nr_irqs) -		nr_irqs = nr; +	if (nr < irq_get_nr_irqs()) +		irq_set_nr_irqs(nr);  	/*  	 * We don't know if PIC is present at this point so we need to do @@ -731,8 +738,8 @@ int __init arch_probe_nr_irqs(void)  void lapic_assign_legacy_vector(unsigned int irq, bool replace)  {  	/* -	 * Use assign system here so it wont get accounted as allocated -	 * and moveable in the cpu hotplug check and it prevents managed +	 * Use assign system here so it won't get accounted as allocated +	 * and movable in the cpu hotplug check and it prevents managed  	 * irq reservation from touching it.  	 */  	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); @@ -792,7 +799,7 @@ int __init arch_early_irq_init(void)  	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,  						   NULL);  	BUG_ON(x86_vector_domain == NULL); -	irq_set_default_host(x86_vector_domain); +	irq_set_default_domain(x86_vector_domain);  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); @@ -845,10 +852,21 @@ void lapic_online(void)  		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));  } +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr); +  void lapic_offline(void)  { +	struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup); +  	lock_vector_lock(); + +	/* In case the vector cleanup timer has not expired */ +	__vector_cleanup(cl, false); +  	irq_matrix_offline(vector_matrix); +	WARN_ON_ONCE(timer_delete_sync_try(&cl->timer) < 0); +	WARN_ON_ONCE(!hlist_empty(&cl->head)); +  	unlock_vector_lock();  } @@ -870,50 +888,6 @@ static int apic_set_affinity(struct irq_data *irqd,  	return err ? err : IRQ_SET_MASK_OK;  } -#else -# define apic_set_affinity	NULL -#endif - -static int apic_retrigger_irq(struct irq_data *irqd) -{ -	struct apic_chip_data *apicd = apic_chip_data(irqd); -	unsigned long flags; - -	raw_spin_lock_irqsave(&vector_lock, flags); -	apic->send_IPI(apicd->cpu, apicd->vector); -	raw_spin_unlock_irqrestore(&vector_lock, flags); - -	return 1; -} - -void apic_ack_irq(struct irq_data *irqd) -{ -	irq_move_irq(irqd); -	ack_APIC_irq(); -} - -void apic_ack_edge(struct irq_data *irqd) -{ -	irq_complete_move(irqd_cfg(irqd)); -	apic_ack_irq(irqd); -} - -static void x86_vector_msi_compose_msg(struct irq_data *data, -				       struct msi_msg *msg) -{ -       __irq_msi_compose_msg(irqd_cfg(data), msg, false); -} - -static struct irq_chip lapic_controller = { -	.name			= "APIC", -	.irq_ack		= apic_ack_edge, -	.irq_set_affinity	= apic_set_affinity, -	.irq_compose_msi_msg	= x86_vector_msi_compose_msg, -	.irq_retrigger		= apic_retrigger_irq, -}; - -#ifdef CONFIG_SMP -  static void free_moved_vector(struct apic_chip_data *apicd)  {  	unsigned int vector = apicd->prev_vector; @@ -938,116 +912,27 @@ static void free_moved_vector(struct apic_chip_data *apicd)  	apicd->move_in_progress = 0;  } -DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup) -{ -	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); -	struct apic_chip_data *apicd; -	struct hlist_node *tmp; - -	ack_APIC_irq(); -	/* Prevent vectors vanishing under us */ -	raw_spin_lock(&vector_lock); - -	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { -		unsigned int irr, vector = apicd->prev_vector; - -		/* -		 * Paranoia: Check if the vector that needs to be cleaned -		 * up is registered at the APICs IRR. If so, then this is -		 * not the best time to clean it up. Clean it up in the -		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR -		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest -		 * priority external vector, so on return from this -		 * interrupt the device interrupt will happen first. -		 */ -		irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); -		if (irr & (1U << (vector % 32))) { -			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); -			continue; -		} -		free_moved_vector(apicd); -	} - -	raw_spin_unlock(&vector_lock); -} - -static void __send_cleanup_vector(struct apic_chip_data *apicd) -{ -	unsigned int cpu; - -	raw_spin_lock(&vector_lock); -	apicd->move_in_progress = 0; -	cpu = apicd->prev_cpu; -	if (cpu_online(cpu)) { -		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); -		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); -	} else { -		apicd->prev_vector = 0; -	} -	raw_spin_unlock(&vector_lock); -} - -void send_cleanup_vector(struct irq_cfg *cfg) -{ -	struct apic_chip_data *apicd; - -	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); -	if (apicd->move_in_progress) -		__send_cleanup_vector(apicd); -} - -void irq_complete_move(struct irq_cfg *cfg) -{ -	struct apic_chip_data *apicd; - -	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); -	if (likely(!apicd->move_in_progress)) -		return; - -	/* -	 * If the interrupt arrived on the new target CPU, cleanup the -	 * vector on the old target CPU. A vector check is not required -	 * because an interrupt can never move from one vector to another -	 * on the same CPU. -	 */ -	if (apicd->cpu == smp_processor_id()) -		__send_cleanup_vector(apicd); -} -  /*   * Called from fixup_irqs() with @desc->lock held and interrupts disabled.   */ -void irq_force_complete_move(struct irq_desc *desc) +static void apic_force_complete_move(struct irq_data *irqd)  { +	unsigned int cpu = smp_processor_id();  	struct apic_chip_data *apicd; -	struct irq_data *irqd;  	unsigned int vector; -	/* -	 * The function is called for all descriptors regardless of which -	 * irqdomain they belong to. For example if an IRQ is provided by -	 * an irq_chip as part of a GPIO driver, the chip data for that -	 * descriptor is specific to the irq_chip in question. -	 * -	 * Check first that the chip_data is what we expect -	 * (apic_chip_data) before touching it any further. -	 */ -	irqd = irq_domain_get_irq_data(x86_vector_domain, -				       irq_desc_get_irq(desc)); -	if (!irqd) -		return; - -	raw_spin_lock(&vector_lock); +	guard(raw_spinlock)(&vector_lock);  	apicd = apic_chip_data(irqd);  	if (!apicd) -		goto unlock; +		return;  	/* -	 * If prev_vector is empty, no action required. +	 * If prev_vector is empty or the descriptor is neither currently +	 * nor previously on the outgoing CPU no action required.  	 */  	vector = apicd->prev_vector; -	if (!vector) -		goto unlock; +	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) +		return;  	/*  	 * This is tricky. If the cleanup of the old vector has not been @@ -1101,10 +986,166 @@ void irq_force_complete_move(struct irq_desc *desc)  			irqd->irq, vector);  	}  	free_moved_vector(apicd); -unlock: +} + +#else +# define apic_set_affinity		NULL +# define apic_force_complete_move	NULL +#endif + +static int apic_retrigger_irq(struct irq_data *irqd) +{ +	struct apic_chip_data *apicd = apic_chip_data(irqd); +	unsigned long flags; + +	raw_spin_lock_irqsave(&vector_lock, flags); +	__apic_send_IPI(apicd->cpu, apicd->vector); +	raw_spin_unlock_irqrestore(&vector_lock, flags); + +	return 1; +} + +void apic_ack_irq(struct irq_data *irqd) +{ +	irq_move_irq(irqd); +	apic_eoi(); +} + +void apic_ack_edge(struct irq_data *irqd) +{ +	irq_complete_move(irqd_cfg(irqd)); +	apic_ack_irq(irqd); +} + +static void x86_vector_msi_compose_msg(struct irq_data *data, +				       struct msi_msg *msg) +{ +       __irq_msi_compose_msg(irqd_cfg(data), msg, false); +} + +static struct irq_chip lapic_controller = { +	.name				= "APIC", +	.irq_ack			= apic_ack_edge, +	.irq_set_affinity		= apic_set_affinity, +	.irq_compose_msi_msg		= x86_vector_msi_compose_msg, +	.irq_force_complete_move	= apic_force_complete_move, +	.irq_retrigger			= apic_retrigger_irq, +}; + +#ifdef CONFIG_SMP + +static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) +{ +	struct apic_chip_data *apicd; +	struct hlist_node *tmp; +	bool rearm = false; + +	lockdep_assert_held(&vector_lock); + +	hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) { +		unsigned int vector = apicd->prev_vector; + +		/* +		 * Paranoia: Check if the vector that needs to be cleaned +		 * up is registered at the APICs IRR. That's clearly a +		 * hardware issue if the vector arrived on the old target +		 * _after_ interrupts were disabled above. Keep @apicd +		 * on the list and schedule the timer again to give the CPU +		 * a chance to handle the pending interrupt. +		 * +		 * Do not check IRR when called from lapic_offline(), because +		 * fixup_irqs() was just called to scan IRR for set bits and +		 * forward them to new destination CPUs via IPIs. +		 */ +		if (check_irr && is_vector_pending(vector)) { +			pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq); +			rearm = true; +			continue; +		} +		free_moved_vector(apicd); +	} + +	/* +	 * Must happen under vector_lock to make the timer_pending() check +	 * in __vector_schedule_cleanup() race free against the rearm here. +	 */ +	if (rearm) +		mod_timer(&cl->timer, jiffies + 1); +} + +static void vector_cleanup_callback(struct timer_list *tmr) +{ +	struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer); + +	/* Prevent vectors vanishing under us */ +	raw_spin_lock_irq(&vector_lock); +	__vector_cleanup(cl, true); +	raw_spin_unlock_irq(&vector_lock); +} + +static void __vector_schedule_cleanup(struct apic_chip_data *apicd) +{ +	unsigned int cpu = apicd->prev_cpu; + +	raw_spin_lock(&vector_lock); +	apicd->move_in_progress = 0; +	if (cpu_online(cpu)) { +		struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu); + +		hlist_add_head(&apicd->clist, &cl->head); + +		/* +		 * The lockless timer_pending() check is safe here. If it +		 * returns true, then the callback will observe this new +		 * apic data in the hlist as everything is serialized by +		 * vector lock. +		 * +		 * If it returns false then the timer is either not armed +		 * or the other CPU executes the callback, which again +		 * would be blocked on vector lock. Rearming it in the +		 * latter case makes it fire for nothing. +		 * +		 * This is also safe against the callback rearming the timer +		 * because that's serialized via vector lock too. +		 */ +		if (!timer_pending(&cl->timer)) { +			cl->timer.expires = jiffies + 1; +			add_timer_on(&cl->timer, cpu); +		} +	} else { +		pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); +		free_moved_vector(apicd); +	}  	raw_spin_unlock(&vector_lock);  } +void vector_schedule_cleanup(struct irq_cfg *cfg) +{ +	struct apic_chip_data *apicd; + +	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); +	if (apicd->move_in_progress) +		__vector_schedule_cleanup(apicd); +} + +void irq_complete_move(struct irq_cfg *cfg) +{ +	struct apic_chip_data *apicd; + +	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); +	if (likely(!apicd->move_in_progress)) +		return; + +	/* +	 * If the interrupt arrived on the new target CPU, cleanup the +	 * vector on the old target CPU. A vector check is not required +	 * because an interrupt can never move from one vector to another +	 * on the same CPU. +	 */ +	if (apicd->cpu == smp_processor_id()) +		__vector_schedule_cleanup(apicd); +} +  #ifdef CONFIG_HOTPLUG_CPU  /*   * Note, this is not accurate accounting, but at least good enough to @@ -1154,7 +1195,7 @@ static void __init print_local_APIC(void *dummy)  	u64 icr;  	pr_debug("printing local APIC contents on CPU#%d/%d:\n", -		 smp_processor_id(), hard_smp_processor_id()); +		 smp_processor_id(), read_apic_id());  	v = apic_read(APIC_ID);  	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());  	v = apic_read(APIC_LVR); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..7db83212effb 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,11 +9,7 @@  #include "local.h" -struct cluster_mask { -	unsigned int	clusterid; -	int		node; -	struct cpumask	mask; -}; +#define apic_cluster(apicid) ((apicid) >> 4)  /*   * __x2apic_send_IPI_mask() possibly needs to read @@ -23,8 +19,7 @@ struct cluster_mask {  static u32 *x86_cpu_to_logical_apicid __read_mostly;  static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; +static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);  static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  { @@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)  	/* Collapse cpus in a cluster so a single IPI per cluster is sent */  	for_each_cpu(cpu, tmpmsk) { -		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); +		struct cpumask *cmsk = per_cpu(cluster_masks, cpu);  		dest = 0; -		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) +		for_each_cpu_and(clustercpu, tmpmsk, cmsk)  			dest |= x86_cpu_to_logical_apicid[clustercpu];  		if (!dest) @@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)  		__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);  		/* Remove cluster CPUs from tmpmask */ -		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); +		cpumask_andnot(tmpmsk, tmpmsk, cmsk);  	}  	local_irq_restore(flags); @@ -88,84 +83,120 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)  	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);  } -static void x2apic_send_IPI_allbutself(int vector) +static u32 x2apic_calc_apicid(unsigned int cpu)  { -	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); +	return x86_cpu_to_logical_apicid[cpu];  } -static void x2apic_send_IPI_all(int vector) +static void init_x2apic_ldr(void)  { -	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); -} +	struct cpumask *cmsk = this_cpu_read(cluster_masks); -static u32 x2apic_calc_apicid(unsigned int cpu) -{ -	return x86_cpu_to_logical_apicid[cpu]; +	BUG_ON(!cmsk); + +	cpumask_set_cpu(smp_processor_id(), cmsk);  } -static void init_x2apic_ldr(void) +/* + * As an optimisation during boot, set the cluster_mask for all present + * CPUs at once, to prevent each of them having to iterate over the others + * to find the existing cluster_mask. + */ +static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster)  { -	struct cluster_mask *cmsk = this_cpu_read(cluster_masks); -	u32 cluster, apicid = apic_read(APIC_LDR); -	unsigned int cpu; +	int cpu_i; -	x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; +	for_each_present_cpu(cpu_i) { +		struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i); +		u32 apicid = apic->cpu_present_to_apicid(cpu_i); -	if (cmsk) -		goto update; - -	cluster = apicid >> 16; -	for_each_online_cpu(cpu) { -		cmsk = per_cpu(cluster_masks, cpu); -		/* Matching cluster found. Link and update it. */ -		if (cmsk && cmsk->clusterid == cluster) -			goto update; +		if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster) +			continue; + +		if (WARN_ON_ONCE(*cpu_cmsk == cmsk)) +			continue; + +		BUG_ON(*cpu_cmsk); +		*cpu_cmsk = cmsk;  	} -	cmsk = cluster_hotplug_mask; -	cmsk->clusterid = cluster; -	cluster_hotplug_mask = NULL; -update: -	this_cpu_write(cluster_masks, cmsk); -	cpumask_set_cpu(smp_processor_id(), &cmsk->mask);  } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)  { +	struct cpumask *cmsk = NULL; +	unsigned int cpu_i; + +	/* +	 * At boot time, the CPU present mask is stable. The cluster mask is +	 * allocated for the first CPU in the cluster and propagated to all +	 * present siblings in the cluster. If the cluster mask is already set +	 * on entry to this function for a given CPU, there is nothing to do. +	 */  	if (per_cpu(cluster_masks, cpu))  		return 0; + +	if (system_state < SYSTEM_RUNNING) +		goto alloc; +  	/* -	 * If a hotplug spare mask exists, check whether it's on the right -	 * node. If not, free it and allocate a new one. +	 * On post boot hotplug for a CPU which was not present at boot time, +	 * iterate over all possible CPUs (even those which are not present +	 * any more) to find any existing cluster mask.  	 */ -	if (cluster_hotplug_mask) { -		if (cluster_hotplug_mask->node == node) -			return 0; -		kfree(cluster_hotplug_mask); +	for_each_possible_cpu(cpu_i) { +		u32 apicid = apic->cpu_present_to_apicid(cpu_i); + +		if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { +			cmsk = per_cpu(cluster_masks, cpu_i); +			/* +			 * If the cluster is already initialized, just store +			 * the mask and return. There's no need to propagate. +			 */ +			if (cmsk) { +				per_cpu(cluster_masks, cpu) = cmsk; +				return 0; +			} +		}  	} - -	cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), -					    GFP_KERNEL, node); -	if (!cluster_hotplug_mask) +	/* +	 * No CPU in the cluster has ever been initialized, so fall through to +	 * the boot time code which will also populate the cluster mask for any +	 * other CPU in the cluster which is (now) present. +	 */ +alloc: +	cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); +	if (!cmsk)  		return -ENOMEM; -	cluster_hotplug_mask->node = node; +	per_cpu(cluster_masks, cpu) = cmsk; +	prefill_clustermask(cmsk, cpu, cluster); +  	return 0;  }  static int x2apic_prepare_cpu(unsigned int cpu)  { -	if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) +	u32 phys_apicid = apic->cpu_present_to_apicid(cpu); +	u32 cluster = apic_cluster(phys_apicid); +	u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); +	int node = cpu_to_node(cpu); + +	x86_cpu_to_logical_apicid[cpu] = logical_apicid; + +	if (alloc_clustermask(cpu, cluster, node) < 0)  		return -ENOMEM; -	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) + +	if (!zalloc_cpumask_var_node(&per_cpu(ipi_mask, cpu), GFP_KERNEL, node))  		return -ENOMEM; +  	return 0;  }  static int x2apic_dead_cpu(unsigned int dead_cpu)  { -	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); +	struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);  	if (cmsk) -		cpumask_clear_cpu(dead_cpu, &cmsk->mask); +		cpumask_clear_cpu(dead_cpu, cmsk);  	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));  	return 0;  } @@ -198,25 +229,17 @@ static struct apic apic_x2apic_cluster __ro_after_init = {  	.name				= "cluster x2apic",  	.probe				= x2apic_cluster_probe,  	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check, -	.apic_id_valid			= x2apic_apic_id_valid, -	.apic_id_registered		= x2apic_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= true,  	.disable_esr			= 0, -	.check_apicid_used		= NULL,  	.init_apic_ldr			= init_x2apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= x2apic_phys_pkg_id, +	.max_apic_id			= UINT_MAX, +	.x2apic_set_max_apicid		= true,  	.get_apic_id			= x2apic_get_apic_id, -	.set_apic_id			= x2apic_set_apic_id,  	.calc_dest_apicid		= x2apic_calc_apicid, @@ -226,16 +249,13 @@ static struct apic apic_x2apic_cluster __ro_after_init = {  	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,  	.send_IPI_all			= x2apic_send_IPI_all,  	.send_IPI_self			= x2apic_send_IPI_self, - -	.inquire_remote_apic		= NULL, +	.nmi_to_offline_cpu		= true,  	.read				= native_apic_msr_read,  	.write				= native_apic_msr_write, -	.eoi_write			= native_apic_msr_eoi_write, +	.eoi				= native_apic_msr_eoi,  	.icr_read			= native_x2apic_icr_read,  	.icr_write			= native_x2apic_icr_write, -	.wait_icr_idle			= native_x2apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,  };  apic_driver(apic_x2apic_cluster); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..12d4c35547a6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -8,11 +8,13 @@  int x2apic_phys;  static struct apic apic_x2apic_phys; -static u32 x2apic_max_apicid __ro_after_init; +u32 x2apic_max_apicid __ro_after_init = UINT_MAX;  void __init x2apic_set_max_apicid(u32 apicid)  {  	x2apic_max_apicid = apicid; +	if (apic->x2apic_set_max_apicid) +		apic->max_apic_id = apicid;  }  static int __init set_x2apic_phys_mode(char *arg) @@ -81,40 +83,28 @@ static void  	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);  } -static void x2apic_send_IPI_allbutself(int vector) +static void __x2apic_send_IPI_shorthand(int vector, u32 which)  { -	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT); -} +	unsigned long cfg = __prepare_ICR(which, vector, 0); -static void x2apic_send_IPI_all(int vector) -{ -	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC); +	/* x2apic MSRs are special and need a special fence: */ +	weak_wrmsr_fence(); +	native_x2apic_icr_write(cfg, 0);  } -static void init_x2apic_ldr(void) +void x2apic_send_IPI_allbutself(int vector)  { +	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);  } -static int x2apic_phys_probe(void) -{ -	if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) -		return 1; - -	return apic == &apic_x2apic_phys; -} - -/* Common x2apic functions, also used by x2apic_cluster */ -int x2apic_apic_id_valid(u32 apicid) +void x2apic_send_IPI_all(int vector)  { -	if (x2apic_max_apicid && apicid > x2apic_max_apicid) -		return 0; - -	return 1; +	__x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);  } -int x2apic_apic_id_registered(void) +void x2apic_send_IPI_self(int vector)  { -	return 1; +	apic_write(APIC_SELF_IPI, vector);  }  void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) @@ -123,59 +113,37 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)  	native_x2apic_icr_write(cfg, apicid);  } -void __x2apic_send_IPI_shorthand(int vector, u32 which) +static int x2apic_phys_probe(void)  { -	unsigned long cfg = __prepare_ICR(which, vector, 0); +	if (!x2apic_mode) +		return 0; -	/* x2apic MSRs are special and need a special fence: */ -	weak_wrmsr_fence(); -	native_x2apic_icr_write(cfg, 0); -} +	if (x2apic_phys || x2apic_fadt_phys()) +		return 1; -unsigned int x2apic_get_apic_id(unsigned long id) -{ -	return id; +	return apic == &apic_x2apic_phys;  } -u32 x2apic_set_apic_id(unsigned int id) +u32 x2apic_get_apic_id(u32 id)  {  	return id;  } -int x2apic_phys_pkg_id(int initial_apicid, int index_msb) -{ -	return initial_apicid >> index_msb; -} - -void x2apic_send_IPI_self(int vector) -{ -	apic_write(APIC_SELF_IPI, vector); -} -  static struct apic apic_x2apic_phys __ro_after_init = {  	.name				= "physical x2apic",  	.probe				= x2apic_phys_probe,  	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check, -	.apic_id_valid			= x2apic_apic_id_valid, -	.apic_id_registered		= x2apic_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= false,  	.disable_esr			= 0, -	.check_apicid_used		= NULL, -	.init_apic_ldr			= init_x2apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= x2apic_phys_pkg_id, +	.max_apic_id			= UINT_MAX, +	.x2apic_set_max_apicid		= true,  	.get_apic_id			= x2apic_get_apic_id, -	.set_apic_id			= x2apic_set_apic_id,  	.calc_dest_apicid		= apic_default_calc_apicid, @@ -185,16 +153,13 @@ static struct apic apic_x2apic_phys __ro_after_init = {  	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,  	.send_IPI_all			= x2apic_send_IPI_all,  	.send_IPI_self			= x2apic_send_IPI_self, - -	.inquire_remote_apic		= NULL, +	.nmi_to_offline_cpu		= true,  	.read				= native_apic_msr_read,  	.write				= native_apic_msr_write, -	.eoi_write			= native_apic_msr_eoi_write, +	.eoi				= native_apic_msr_eoi,  	.icr_read			= native_x2apic_icr_read,  	.icr_write			= native_x2apic_icr_write, -	.wait_icr_idle			= native_x2apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,  };  apic_driver(apic_x2apic_phys); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 482855227964..15209f220e1f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -25,6 +25,8 @@  #include <asm/uv/uv.h>  #include <asm/apic.h> +#include "local.h" +  static enum uv_system_type	uv_system_type;  static int			uv_hubbed_system;  static int			uv_hubless_system; @@ -108,7 +110,7 @@ static void __init early_get_pnodeid(void)  	} else if (UVH_RH_GAM_ADDR_MAP_CONFIG) {  		union uvh_rh_gam_addr_map_config_u  m_n_config; -	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG); +		m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);  		uv_cpuid.n_skt = m_n_config.s.n_skt;  		if (is_uv(UV3))  			uv_cpuid.m_skt = m_n_config.s3.m_skt; @@ -239,54 +241,20 @@ static void __init uv_tsc_check_sync(void)  	is_uv(UV3) ? sname.s3.field :		\  	undef) -/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ - -#define SMT_LEVEL			0	/* Leaf 0xb SMT level */ -#define INVALID_TYPE			0	/* Leaf 0xb sub-leaf types */ -#define SMT_TYPE			1 -#define CORE_TYPE			2 -#define LEAFB_SUBTYPE(ecx)		(((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax)	((eax) & 0x1f) - -static void set_x2apic_bits(void) -{ -	unsigned int eax, ebx, ecx, edx, sub_index; -	unsigned int sid_shift; - -	cpuid(0, &eax, &ebx, &ecx, &edx); -	if (eax < 0xb) { -		pr_info("UV: CPU does not have CPUID.11\n"); -		return; -	} - -	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); -	if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { -		pr_info("UV: CPUID.11 not implemented\n"); -		return; -	} - -	sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); -	sub_index = 1; -	do { -		cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); -		if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { -			sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); -			break; -		} -		sub_index++; -	} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - -	uv_cpuid.apicid_shift	= 0; -	uv_cpuid.apicid_mask	= (~(-1 << sid_shift)); -	uv_cpuid.socketid_shift = sid_shift; -} -  static void __init early_get_apic_socketid_shift(void)  { +	unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN); +  	if (is_uv2_hub() || is_uv3_hub())  		uvh_apicid.v = uv_early_read_mmr(UVH_APICID); -	set_x2apic_bits(); +	if (sid_shift) { +		uv_cpuid.apicid_shift	= 0; +		uv_cpuid.apicid_mask	= (~(-1 << sid_shift)); +		uv_cpuid.socketid_shift = sid_shift; +	} else { +		pr_info("UV: CPU does not have valid CPUID.11\n"); +	}  	pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);  	pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); @@ -294,8 +262,7 @@ static void __init early_get_apic_socketid_shift(void)  static void __init uv_stringify(int len, char *to, char *from)  { -	/* Relies on 'to' being NULL chars so result will be NULL terminated */ -	strncpy(to, from, len-1); +	strscpy(to, from, len);  	/* Trim trailing spaces */  	(void)strim(to); @@ -546,7 +513,6 @@ unsigned long sn_rtc_cycles_per_second;  EXPORT_SYMBOL(sn_rtc_cycles_per_second);  /* The following values are used for the per node hub info struct */ -static __initdata unsigned short		*_node_to_pnode;  static __initdata unsigned short		_min_socket, _max_socket;  static __initdata unsigned short		_min_pnode, _max_pnode, _gr_table_len;  static __initdata struct uv_gam_range_entry	*uv_gre_table; @@ -554,6 +520,7 @@ static __initdata struct uv_gam_parameters	*uv_gp_table;  static __initdata unsigned short		*_socket_to_node;  static __initdata unsigned short		*_socket_to_pnode;  static __initdata unsigned short		*_pnode_to_socket; +static __initdata unsigned short		*_node_to_socket;  static __initdata struct uv_gam_range_s		*_gr_table; @@ -617,7 +584,8 @@ static __init void build_uv_gr_table(void)  	bytes = _gr_table_len * sizeof(struct uv_gam_range_s);  	grt = kzalloc(bytes, GFP_KERNEL); -	BUG_ON(!grt); +	if (WARN_ON_ONCE(!grt)) +		return;  	_gr_table = grt;  	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { @@ -699,7 +667,7 @@ static __init void build_uv_gr_table(void)  	}  } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu)  {  	unsigned long val;  	int pnode; @@ -777,50 +745,6 @@ static void uv_send_IPI_all(int vector)  	uv_send_IPI_mask(cpu_online_mask, vector);  } -static int uv_apic_id_valid(u32 apicid) -{ -	return 1; -} - -static int uv_apic_id_registered(void) -{ -	return 1; -} - -static void uv_init_apic_ldr(void) -{ -} - -static u32 apic_uv_calc_apicid(unsigned int cpu) -{ -	return apic_default_calc_apicid(cpu); -} - -static unsigned int x2apic_get_apic_id(unsigned long id) -{ -	return id; -} - -static u32 set_apic_id(unsigned int id) -{ -	return id; -} - -static unsigned int uv_read_apic_id(void) -{ -	return x2apic_get_apic_id(apic_read(APIC_ID)); -} - -static int uv_phys_pkg_id(int initial_apicid, int index_msb) -{ -	return uv_read_apic_id() >> index_msb; -} - -static void uv_send_IPI_self(int vector) -{ -	apic_write(APIC_SELF_IPI, vector); -} -  static int uv_probe(void)  {  	return apic == &apic_x2apic_uv_x; @@ -831,45 +755,32 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {  	.name				= "UV large system",  	.probe				= uv_probe,  	.acpi_madt_oem_check		= uv_acpi_madt_oem_check, -	.apic_id_valid			= uv_apic_id_valid, -	.apic_id_registered		= uv_apic_id_registered, -	.delivery_mode			= APIC_DELIVERY_MODE_FIXED,  	.dest_mode_logical		= false,  	.disable_esr			= 0, -	.check_apicid_used		= NULL, -	.init_apic_ldr			= uv_init_apic_ldr, -	.ioapic_phys_id_map		= NULL, -	.setup_apic_routing		= NULL,  	.cpu_present_to_apicid		= default_cpu_present_to_apicid, -	.apicid_to_cpu_present		= NULL, -	.check_phys_apicid_present	= default_check_phys_apicid_present, -	.phys_pkg_id			= uv_phys_pkg_id, +	.max_apic_id			= UINT_MAX,  	.get_apic_id			= x2apic_get_apic_id, -	.set_apic_id			= set_apic_id, -	.calc_dest_apicid		= apic_uv_calc_apicid, +	.calc_dest_apicid		= apic_default_calc_apicid,  	.send_IPI			= uv_send_IPI_one,  	.send_IPI_mask			= uv_send_IPI_mask,  	.send_IPI_mask_allbutself	= uv_send_IPI_mask_allbutself,  	.send_IPI_allbutself		= uv_send_IPI_allbutself,  	.send_IPI_all			= uv_send_IPI_all, -	.send_IPI_self			= uv_send_IPI_self, +	.send_IPI_self			= x2apic_send_IPI_self,  	.wakeup_secondary_cpu		= uv_wakeup_secondary, -	.inquire_remote_apic		= NULL,  	.read				= native_apic_msr_read,  	.write				= native_apic_msr_write, -	.eoi_write			= native_apic_msr_eoi_write, +	.eoi				= native_apic_msr_eoi,  	.icr_read			= native_x2apic_icr_read,  	.icr_write			= native_x2apic_icr_write, -	.wait_icr_idle			= native_x2apic_wait_icr_idle, -	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,  };  #define	UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH	3 @@ -1012,7 +923,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  	/* One (UV2) mapping */  	if (index == UV2_MMIOH) { -		strncpy(id, "MMIOH", sizeof(id)); +		strscpy(id, "MMIOH", sizeof(id));  		max_io = max_pnode;  		mapped = 0;  		goto map_exit; @@ -1022,7 +933,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  	switch (index) {  	case UVY_MMIOH0:  		mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0; -		nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; +		nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;  		n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;  		min_nasid = min_pnode;  		max_nasid = max_pnode; @@ -1030,7 +941,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  		break;  	case UVY_MMIOH1:  		mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1; -		nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; +		nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;  		n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;  		min_nasid = min_pnode;  		max_nasid = max_pnode; @@ -1038,7 +949,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  		break;  	case UVX_MMIOH0:  		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0; -		nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; +		nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;  		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;  		min_nasid = min_pnode * 2;  		max_nasid = max_pnode * 2; @@ -1046,7 +957,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  		break;  	case UVX_MMIOH1:  		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1; -		nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; +		nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;  		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;  		min_nasid = min_pnode * 2;  		max_nasid = max_pnode * 2; @@ -1072,8 +983,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index,  		/* Invalid NASID check */  		if (nasid < min_nasid || max_nasid < nasid) { -			pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n", -				__func__, index, min_nasid, max_nasid); +			/* Not an error: unused table entries get "poison" values */ +			pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n", +			       __func__, index, nasid, min_nasid, max_nasid);  			nasid = -1;  		} @@ -1292,6 +1204,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)  	hi->nasid_shift		= uv_cpuid.nasid_shift;  	hi->min_pnode		= _min_pnode;  	hi->min_socket		= _min_socket; +	hi->node_to_socket	= _node_to_socket;  	hi->pnode_to_socket	= _pnode_to_socket;  	hi->socket_to_node	= _socket_to_node;  	hi->socket_to_pnode	= _socket_to_pnode; @@ -1348,7 +1261,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)  	struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;  	unsigned long lgre = 0, gend = 0;  	int index = 0; -	int sock_min = 999999, pnode_min = 99999; +	int sock_min = INT_MAX, pnode_min = INT_MAX;  	int sock_max = -1, pnode_max = -1;  	uv_gre_table = gre; @@ -1459,11 +1372,37 @@ static int __init decode_uv_systab(void)  	return 0;  } +/* + * Given a bitmask 'bits' representing presnt blades, numbered + * starting at 'base', masking off unused high bits of blade number + * with 'mask', update the minimum and maximum blade numbers that we + * have found.  (Masking with 'mask' necessary because of BIOS + * treatment of system partitioning when creating this table we are + * interpreting.) + */ +static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max) +{ +	int first, last; + +	if (!bits) +		return; +	first = (base + __ffs(bits)) & mask; +	last =  (base + __fls(bits)) & mask; + +	if (*min > first) +		*min = first; +	if (*max < last) +		*max = last; +} +  /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */  static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)  {  	unsigned long np;  	int i, uv_pb = 0; +	int sock_min = INT_MAX, sock_max = -1, s_mask; + +	s_mask = (1 << uv_cpuid.n_skt) - 1;  	if (UVH_NODE_PRESENT_TABLE) {  		pr_info("UV: NODE_PRESENT_DEPTH = %d\n", @@ -1471,35 +1410,82 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)  		for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {  			np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);  			pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); -			uv_pb += hweight64(np); +			blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max);  		}  	}  	if (UVH_NODE_PRESENT_0) {  		np = uv_read_local_mmr(UVH_NODE_PRESENT_0);  		pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np); -		uv_pb += hweight64(np); +		blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max);  	}  	if (UVH_NODE_PRESENT_1) {  		np = uv_read_local_mmr(UVH_NODE_PRESENT_1);  		pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np); -		uv_pb += hweight64(np); +		blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max); +	} + +	/* Only update if we actually found some bits indicating blades present */ +	if (sock_max >= sock_min) { +		_min_socket = sock_min; +		_max_socket = sock_max; +		uv_pb = sock_max - sock_min + 1;  	}  	if (uv_possible_blades != uv_pb)  		uv_possible_blades = uv_pb; -	pr_info("UV: number nodes/possible blades %d\n", uv_pb); +	pr_info("UV: number nodes/possible blades %d (%d - %d)\n", +		uv_pb, sock_min, sock_max);  } +static int __init alloc_conv_table(int num_elem, unsigned short **table) +{ +	int i; +	size_t bytes; + +	bytes = num_elem * sizeof(*table[0]); +	*table = kmalloc(bytes, GFP_KERNEL); +	if (WARN_ON_ONCE(!*table)) +		return -ENOMEM; +	for (i = 0; i < num_elem; i++) +		((unsigned short *)*table)[i] = SOCK_EMPTY; +	return 0; +} + +/* Remove conversion table if it's 1:1 */ +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) + +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) +{ +	int i; +	unsigned short *table = *tp; + +	if (table == NULL) +		return; +	if (max != max2) +		return; +	for (i = 0; i < max; i++) { +		if (i != table[i]) +			return; +	} +	kfree(table); +	*tp = NULL; +	pr_info("UV: %s is 1:1, conversion table removed\n", tname); +} + +/* + * Build Socket Tables + * If the number of nodes is >1 per socket, socket to node table will + * contain lowest node number on that socket. + */  static void __init build_socket_tables(void)  {  	struct uv_gam_range_entry *gre = uv_gre_table; -	int num, nump; -	int cpu, i, lnid; +	int nums, numn, nump; +	int i, lnid, apicid;  	int minsock = _min_socket;  	int maxsock = _max_socket;  	int minpnode = _min_pnode;  	int maxpnode = _max_pnode; -	size_t bytes;  	if (!gre) {  		if (is_uv2_hub() || is_uv3_hub()) { @@ -1507,39 +1493,36 @@ static void __init build_socket_tables(void)  			return;  		}  		pr_err("UV: Error: UVsystab address translations not available!\n"); -		BUG(); +		WARN_ON_ONCE(!gre); +		return;  	} -	/* Build socket id -> node id, pnode */ -	num = maxsock - minsock + 1; -	bytes = num * sizeof(_socket_to_node[0]); -	_socket_to_node = kmalloc(bytes, GFP_KERNEL); -	_socket_to_pnode = kmalloc(bytes, GFP_KERNEL); - +	numn = num_possible_nodes();  	nump = maxpnode - minpnode + 1; -	bytes = nump * sizeof(_pnode_to_socket[0]); -	_pnode_to_socket = kmalloc(bytes, GFP_KERNEL); -	BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); - -	for (i = 0; i < num; i++) -		_socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; - -	for (i = 0; i < nump; i++) -		_pnode_to_socket[i] = SOCK_EMPTY; +	nums = maxsock - minsock + 1; + +	/* Allocate and clear tables */ +	if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) +	    || (alloc_conv_table(nums, &_socket_to_pnode) < 0) +	    || (alloc_conv_table(numn, &_node_to_socket) < 0) +	    || (alloc_conv_table(nums, &_socket_to_node) < 0)) { +		kfree(_pnode_to_socket); +		kfree(_socket_to_pnode); +		kfree(_node_to_socket); +		return; +	}  	/* Fill in pnode/node/addr conversion list values: */ -	pr_info("UV: GAM Building socket/pnode conversion tables\n");  	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {  		if (gre->type == UV_GAM_RANGE_TYPE_HOLE)  			continue;  		i = gre->sockid - minsock; -		/* Duplicate: */ -		if (_socket_to_pnode[i] != SOCK_EMPTY) -			continue; -		_socket_to_pnode[i] = gre->pnode; +		if (_socket_to_pnode[i] == SOCK_EMPTY) +			_socket_to_pnode[i] = gre->pnode;  		i = gre->pnode - minpnode; -		_pnode_to_socket[i] = gre->sockid; +		if (_pnode_to_socket[i] == SOCK_EMPTY) +			_pnode_to_socket[i] = gre->sockid;  		pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",  			gre->sockid, gre->type, gre->nasid, @@ -1549,66 +1532,38 @@ static void __init build_socket_tables(void)  	/* Set socket -> node values: */  	lnid = NUMA_NO_NODE; -	for_each_present_cpu(cpu) { -		int nid = cpu_to_node(cpu); -		int apicid, sockid; +	for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) { +		int nid = __apicid_to_node[apicid]; +		int sockid; -		if (lnid == nid) +		if ((nid == NUMA_NO_NODE) || (lnid == nid))  			continue;  		lnid = nid; -		apicid = per_cpu(x86_cpu_to_apicid, cpu); +  		sockid = apicid >> uv_cpuid.socketid_shift; -		_socket_to_node[sockid - minsock] = nid; -		pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", -			sockid, apicid, nid); -	} -	/* Set up physical blade to pnode translation from GAM Range Table: */ -	bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); -	_node_to_pnode = kmalloc(bytes, GFP_KERNEL); -	BUG_ON(!_node_to_pnode); +		if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) +			_socket_to_node[sockid - minsock] = nid; -	for (lnid = 0; lnid < num_possible_nodes(); lnid++) { -		unsigned short sockid; +		if (_node_to_socket[nid] == SOCK_EMPTY) +			_node_to_socket[nid] = sockid; -		for (sockid = minsock; sockid <= maxsock; sockid++) { -			if (lnid == _socket_to_node[sockid - minsock]) { -				_node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; -				break; -			} -		} -		if (sockid > maxsock) { -			pr_err("UV: socket for node %d not found!\n", lnid); -			BUG(); -		} +		pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", +			sockid, +			apicid, +			_node_to_socket[nid], +			nid, +			_socket_to_node[sockid - minsock]);  	}  	/* -	 * If socket id == pnode or socket id == node for all nodes, +	 * If e.g. socket id == pnode for all pnodes,  	 *   system runs faster by removing corresponding conversion table.  	 */ -	pr_info("UV: Checking socket->node/pnode for identity maps\n"); -	if (minsock == 0) { -		for (i = 0; i < num; i++) -			if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) -				break; -		if (i >= num) { -			kfree(_socket_to_node); -			_socket_to_node = NULL; -			pr_info("UV: 1:1 socket_to_node table removed\n"); -		} -	} -	if (minsock == minpnode) { -		for (i = 0; i < num; i++) -			if (_socket_to_pnode[i] != SOCK_EMPTY && -				_socket_to_pnode[i] != i + minpnode) -				break; -		if (i >= num) { -			kfree(_socket_to_pnode); -			_socket_to_pnode = NULL; -			pr_info("UV: 1:1 socket_to_pnode table removed\n"); -		} -	} +	FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); +	FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); +	FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); +	FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump);  }  /* Check which reboot to use */ @@ -1692,12 +1647,13 @@ static __init int uv_system_init_hubless(void)  static void __init uv_system_init_hub(void)  {  	struct uv_hub_info_s hub_info = {0}; -	int bytes, cpu, nodeid; -	unsigned short min_pnode = 9999, max_pnode = 0; +	int bytes, cpu, nodeid, bid; +	unsigned short min_pnode = USHRT_MAX, max_pnode = 0;  	char *hub = is_uv5_hub() ? "UV500" :  		    is_uv4_hub() ? "UV400" :  		    is_uv3_hub() ? "UV300" :  		    is_uv2_hub() ? "UV2000/3000" : NULL; +	struct uv_hub_info_s **uv_hub_info_list_blade;  	if (!hub) {  		pr_err("UV: Unknown/unsupported UV hub\n"); @@ -1720,9 +1676,12 @@ static void __init uv_system_init_hub(void)  	build_uv_gr_table();  	set_block_size();  	uv_init_hub_info(&hub_info); -	uv_possible_blades = num_possible_nodes(); -	if (!_node_to_pnode) +	/* If UV2 or UV3 may need to get # blades from HW */ +	if (is_uv(UV2|UV3) && !uv_gre_table)  		boot_init_possible_blades(&hub_info); +	else +		/* min/max sockets set in decode_gam_rng_tbl */ +		uv_possible_blades = (_max_socket - _min_socket) + 1;  	/* uv_num_possible_blades() is really the hub count: */  	pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); @@ -1731,79 +1690,98 @@ static void __init uv_system_init_hub(void)  	hub_info.coherency_domain_number = sn_coherency_id;  	uv_rtc_init(); +	/* +	 * __uv_hub_info_list[] is indexed by node, but there is only +	 * one hub_info structure per blade.  First, allocate one +	 * structure per blade.  Further down we create a per-node +	 * table (__uv_hub_info_list[]) pointing to hub_info +	 * structures for the correct blade. +	 */ +  	bytes = sizeof(void *) * uv_num_possible_blades(); -	__uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); -	BUG_ON(!__uv_hub_info_list); +	uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); +	if (WARN_ON_ONCE(!uv_hub_info_list_blade)) +		return;  	bytes = sizeof(struct uv_hub_info_s); -	for_each_node(nodeid) { +	for_each_possible_blade(bid) {  		struct uv_hub_info_s *new_hub; -		if (__uv_hub_info_list[nodeid]) { -			pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); -			BUG(); +		/* Allocate & fill new per hub info list */ +		new_hub = (bid == 0) ?  &uv_hub_info_node0 +			: kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); +		if (WARN_ON_ONCE(!new_hub)) { +			/* do not kfree() bid 0, which is statically allocated */ +			while (--bid > 0) +				kfree(uv_hub_info_list_blade[bid]); +			kfree(uv_hub_info_list_blade); +			return;  		} -		/* Allocate new per hub info list */ -		new_hub = (nodeid == 0) ?  &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); -		BUG_ON(!new_hub); -		__uv_hub_info_list[nodeid] = new_hub; -		new_hub = uv_hub_info_list(nodeid); -		BUG_ON(!new_hub); +		uv_hub_info_list_blade[bid] = new_hub;  		*new_hub = hub_info;  		/* Use information from GAM table if available: */ -		if (_node_to_pnode) -			new_hub->pnode = _node_to_pnode[nodeid]; +		if (uv_gre_table) +			new_hub->pnode = uv_blade_to_pnode(bid);  		else /* Or fill in during CPU loop: */  			new_hub->pnode = 0xffff; -		new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); +		new_hub->numa_blade_id = bid;  		new_hub->memory_nid = NUMA_NO_NODE;  		new_hub->nr_possible_cpus = 0;  		new_hub->nr_online_cpus = 0;  	} +	/* +	 * Now populate __uv_hub_info_list[] for each node with the +	 * pointer to the struct for the blade it resides on. +	 */ + +	bytes = sizeof(void *) * num_possible_nodes(); +	__uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); +	if (WARN_ON_ONCE(!__uv_hub_info_list)) { +		for_each_possible_blade(bid) +			/* bid 0 is statically allocated */ +			if (bid != 0) +				kfree(uv_hub_info_list_blade[bid]); +		kfree(uv_hub_info_list_blade); +		return; +	} + +	for_each_node(nodeid) +		__uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; +  	/* Initialize per CPU info: */  	for_each_possible_cpu(cpu) {  		int apicid = per_cpu(x86_cpu_to_apicid, cpu); -		int numa_node_id; +		unsigned short bid;  		unsigned short pnode; -		nodeid = cpu_to_node(cpu); -		numa_node_id = numa_cpu_node(cpu);  		pnode = uv_apicid_to_pnode(apicid); +		bid = uv_pnode_to_socket(pnode) - _min_socket; -		uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); +		uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid];  		uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;  		if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)  			uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); -		/* Init memoryless node: */ -		if (nodeid != numa_node_id && -		    uv_hub_info_list(numa_node_id)->pnode == 0xffff) -			uv_hub_info_list(numa_node_id)->pnode = pnode; -		else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) +		if (uv_cpu_hub_info(cpu)->pnode == 0xffff)  			uv_cpu_hub_info(cpu)->pnode = pnode;  	} -	for_each_node(nodeid) { -		unsigned short pnode = uv_hub_info_list(nodeid)->pnode; +	for_each_possible_blade(bid) { +		unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; -		/* Add pnode info for pre-GAM list nodes without CPUs: */ -		if (pnode == 0xffff) { -			unsigned long paddr; +		if (pnode == 0xffff) +			continue; -			paddr = node_start_pfn(nodeid) << PAGE_SHIFT; -			pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); -			uv_hub_info_list(nodeid)->pnode = pnode; -		}  		min_pnode = min(pnode, min_pnode);  		max_pnode = max(pnode, max_pnode); -		pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", -			nodeid, -			uv_hub_info_list(nodeid)->pnode, -			uv_hub_info_list(nodeid)->nr_possible_cpus); +		pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", +			bid, +			uv_hub_info_list_blade[bid]->pnode, +			uv_hub_info_list_blade[bid]->nr_possible_cpus);  	}  	pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); @@ -1811,6 +1789,9 @@ static void __init uv_system_init_hub(void)  	map_mmr_high(max_pnode);  	map_mmioh_high(min_pnode, max_pnode); +	kfree(uv_hub_info_list_blade); +	uv_hub_info_list_blade = NULL; +  	uv_nmi_setup();  	uv_cpu_init();  	uv_setup_proc_files(0);  | 
