aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic/apic.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/apic/apic.c')
-rw-r--r--arch/x86/kernel/apic/apic.c280
1 files changed, 182 insertions, 98 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f5291362da1a..9e2dd2b296cd 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -65,10 +65,10 @@ unsigned int num_processors;
unsigned disabled_cpus;
/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
-u8 boot_cpu_apic_version;
+u8 boot_cpu_apic_version __ro_after_init;
/*
* The highest APIC ID seen during enumeration.
@@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;
* disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
* avoid undefined behaviour caused by sending INIT from AP to BSP.
*/
-static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
+static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
/*
* This variable controls which CPUs receive external NMIs. By default,
* external NMIs are delivered only to the BSP.
*/
-static int apic_extnmi = APIC_EXTNMI_BSP;
+static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
/*
* Map cpu index to physical APIC ID
@@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
+static int enabled_via_apicbase __ro_after_init;
/*
* Handle interrupt mode configuration register (IMCR).
@@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)
__setup("apicpmtimer", setup_apicpmtimer);
#endif
-unsigned long mp_lapic_addr;
-int disable_apic;
+unsigned long mp_lapic_addr __ro_after_init;
+int disable_apic __ro_after_init;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
static int disable_apic_timer __initdata;
/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
+int local_apic_timer_c2_ok __ro_after_init;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
-int apic_verbosity;
+int apic_verbosity __ro_after_init;
-int pic_mode;
+int pic_mode __ro_after_init;
/* Have we found an MP table */
-int smp_found_config;
+int smp_found_config __ro_after_init;
static struct resource lapic_resource = {
.name = "Local APIC",
@@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
-static unsigned long apic_phys;
+static unsigned long apic_phys __ro_after_init;
/*
* Get the LAPIC version
@@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
- DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
{},
};
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
/*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
*/
static void __init lapic_cal_handler(struct clock_event_device *dev)
{
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
/* Deadline timer is based on TSC so no further PIT action required */
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
@@ -851,7 +855,8 @@ bool __init apic_needs_pit(void)
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
- void (*real_handler)(struct clock_event_device *dev);
+ u64 tsc_perj = 0, tsc_start = 0;
+ unsigned long jif_start;
unsigned long deltaj;
long delta, deltatsc;
int pm_referenced = 0;
@@ -878,28 +883,64 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
+ /*
+ * There are platforms w/o global clockevent devices. Instead of
+ * making the calibration conditional on that, use a polling based
+ * approach everywhere.
+ */
local_irq_disable();
- /* Replace the global interrupt handler */
- real_handler = global_clock_event->event_handler;
- global_clock_event->event_handler = lapic_cal_handler;
-
/*
* Setup the APIC counter to maximum. There is no way the lapic
* can underflow in the 100ms detection time frame
*/
__setup_APIC_LVTT(0xffffffff, 0, 0);
- /* Let the interrupts run */
+ /*
+ * Methods to terminate the calibration loop:
+ * 1) Global clockevent if available (jiffies)
+ * 2) TSC if available and frequency is known
+ */
+ jif_start = READ_ONCE(jiffies);
+
+ if (tsc_khz) {
+ tsc_start = rdtsc();
+ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+ }
+
+ /*
+ * Enable interrupts so the tick can fire, if a global
+ * clockevent device is available
+ */
local_irq_enable();
- while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
- cpu_relax();
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+ /* Wait for a tick to elapse */
+ while (1) {
+ if (tsc_khz) {
+ u64 tsc_now = rdtsc();
+ if ((tsc_now - tsc_start) >= tsc_perj) {
+ tsc_start += tsc_perj;
+ break;
+ }
+ } else {
+ unsigned long jif_now = READ_ONCE(jiffies);
- local_irq_disable();
+ if (time_after(jif_now, jif_start)) {
+ jif_start = jif_now;
+ break;
+ }
+ }
+ cpu_relax();
+ }
+
+ /* Invoke the calibration routine */
+ local_irq_disable();
+ lapic_cal_handler(NULL);
+ local_irq_enable();
+ }
- /* Restore the real event handler */
- global_clock_event->event_handler = real_handler;
+ local_irq_disable();
/* Build delta t1-t2 as apic timer counts down */
delta = lapic_cal_t1 - lapic_cal_t2;
@@ -943,10 +984,11 @@ static int __init calibrate_APIC_clock(void)
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
/*
- * PM timer calibration failed or not turned on
- * so lets try APIC timer based calibration
+ * PM timer calibration failed or not turned on so lets try APIC
+ * timer based calibration, if a global clockevent device is
+ * available.
*/
- if (!pm_referenced) {
+ if (!pm_referenced && global_clock_event) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
/*
@@ -1182,25 +1224,38 @@ void clear_local_APIC(void)
}
/**
- * disable_local_APIC - clear and disable the local APIC
+ * apic_soft_disable - Clears and software disables the local APIC on hotplug
+ *
+ * Contrary to disable_local_APIC() this does not touch the enable bit in
+ * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
+ * bus would require a hardware reset as the APIC would lose track of bus
+ * arbitration. On systems with FSB delivery APICBASE could be disabled,
+ * but it has to be guaranteed that no interrupt is sent to the APIC while
+ * in that state and it's not clear from the SDM whether it still responds
+ * to INIT/SIPI messages. Stay on the safe side and use software disable.
*/
-void disable_local_APIC(void)
+void apic_soft_disable(void)
{
- unsigned int value;
-
- /* APIC hasn't been mapped yet */
- if (!x2apic_mode && !apic_phys)
- return;
+ u32 value;
clear_local_APIC();
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
+ /* Soft disable APIC (implies clearing of registers for 82489DX!). */
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+ /* APIC hasn't been mapped yet */
+ if (!x2apic_mode && !apic_phys)
+ return;
+
+ apic_soft_disable();
#ifdef CONFIG_X86_32
/*
@@ -1265,7 +1320,7 @@ void __init sync_Arb_IDs(void)
APIC_INT_LEVELTRIG | APIC_DM_INIT);
}
-enum apic_intr_mode_id apic_intr_mode;
+enum apic_intr_mode_id apic_intr_mode __ro_after_init;
static int __init apic_intr_mode_select(void)
{
@@ -1453,54 +1508,72 @@ static void lapic_setup_esr(void)
oldvalue, value);
}
-static void apic_pending_intr_clear(void)
+#define APIC_IR_REGS APIC_ISR_NR
+#define APIC_IR_BITS (APIC_IR_REGS * 32)
+#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG)
+
+union apic_ir {
+ unsigned long map[APIC_IR_MAPSIZE];
+ u32 regs[APIC_IR_REGS];
+};
+
+static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
{
- long long max_loops = cpu_khz ? cpu_khz : 1000000;
- unsigned long long tsc = 0, ntsc;
- unsigned int queued;
- unsigned long value;
- int i, j, acked = 0;
+ int i, bit;
+
+ /* Read the IRRs */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+ /* Read the ISRs */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
- if (boot_cpu_has(X86_FEATURE_TSC))
- tsc = rdtsc();
/*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
- *
- * Most probably by now CPU has serviced that pending interrupt and
- * it might not have done the ack_APIC_irq() because it thought,
- * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
- * does not clear the ISR bit and cpu thinks it has already serivced
- * the interrupt. Hence a vector might get locked. It was noticed
- * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+ * If the ISR map is not empty. ACK the APIC and run another round
+ * to verify whether a pending IRR has been unblocked and turned
+ * into a ISR.
*/
- do {
- queued = 0;
- for (i = APIC_ISR_NR - 1; i >= 0; i--)
- queued |= apic_read(APIC_IRR + i*0x10);
-
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for_each_set_bit(j, &value, 32) {
- ack_APIC_irq();
- acked++;
- }
- }
- if (acked > 256) {
- pr_err("LAPIC pending interrupts after %d EOI\n", acked);
- break;
- }
- if (queued) {
- if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
- ntsc = rdtsc();
- max_loops = (long long)cpu_khz << 10;
- max_loops -= ntsc - tsc;
- } else {
- max_loops--;
- }
- }
- } while (queued && max_loops > 0);
- WARN_ON(max_loops <= 0);
+ if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
+ /*
+ * There can be multiple ISR bits set when a high priority
+ * interrupt preempted a lower priority one. Issue an ACK
+ * per set bit.
+ */
+ for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+ ack_APIC_irq();
+ return true;
+ }
+
+ return !bitmap_empty(irr->map, APIC_IR_BITS);
+}
+
+/*
+ * After a crash, we no longer service the interrupts and a pending
+ * interrupt from previous kernel might still have ISR bit set.
+ *
+ * Most probably by now the CPU has serviced that pending interrupt and it
+ * might not have done the ack_APIC_irq() because it thought, interrupt
+ * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
+ * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
+ * a vector might get locked. It was noticed for timer irq (vector
+ * 0x31). Issue an extra EOI to clear ISR.
+ *
+ * If there are pending IRR bits they turn into ISR bits after a higher
+ * priority ISR bit has been acked.
+ */
+static void apic_pending_intr_clear(void)
+{
+ union apic_ir irr, isr;
+ unsigned int i;
+
+ /* 512 loops are way oversized and give the APIC a chance to obey. */
+ for (i = 0; i < 512; i++) {
+ if (!apic_check_and_ack(&irr, &isr))
+ return;
+ }
+ /* Dump the IRR/ISR content if that failed */
+ pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
}
/**
@@ -1517,12 +1590,19 @@ static void setup_local_APIC(void)
int logical_apicid, ldr_apicid;
#endif
-
if (disable_apic) {
disable_ioapic_support();
return;
}
+ /*
+ * If this comes from kexec/kcrash the APIC might be enabled in
+ * SPIV. Soft disable it before doing further initialization.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_SPIV_APIC_ENABLED;
+ apic_write(APIC_SPIV, value);
+
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && apic->disable_esr) {
@@ -1532,8 +1612,6 @@ static void setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
- perf_events_lapic_init();
-
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
@@ -1561,13 +1639,17 @@ static void setup_local_APIC(void)
#endif
/*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
+ * Set Task Priority to 'accept all except vectors 0-31'. An APIC
+ * vector in the 16-31 range could be delivered if TPR == 0, but we
+ * would think it's an exception and terrible things will happen. We
+ * never change this later on.
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
+ value |= 0x10;
apic_write(APIC_TASKPRI, value);
+ /* Clear eventually stale ISR/IRR bits */
apic_pending_intr_clear();
/*
@@ -1614,6 +1696,8 @@ static void setup_local_APIC(void)
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
+ perf_events_lapic_init();
+
/*
* Set up LVT0, LVT1:
*