aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c40
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c41
-rw-r--r--arch/x86/kernel/apic/io_apic.c53
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c76
7 files changed, 96 insertions, 693 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
#
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
+obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c0f6426cd337..ce65d449b750 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/dmi.h>
-#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
reserved = reserve_eilvt_offset(offset, new);
if (reserved != new) {
- pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
- "vector 0x%x was already reserved by another core, "
- "APIC%lX=0x%x\n",
- smp_processor_id(), new, reserved, reg, old);
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on another cpu\n",
+ smp_processor_id(), reg, offset, new, reserved);
return -EINVAL;
}
if (!eilvt_entry_is_changeable(old, new)) {
- pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
- "register already in use, APIC%lX=0x%x\n",
- smp_processor_id(), new, reg, old);
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on this cpu\n",
+ smp_processor_id(), reg, offset, new, old);
return -EBUSY;
}
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
* PIT/HPET going. Otherwise register lapic as a dummy
* device.
*/
- if (nmi_watchdog != NMI_IO_APIC)
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
- else
- pr_warning("APIC timer registered as dummy,"
- " due to nmi_watchdog=%d!\n", nmi_watchdog);
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
@@ -1384,8 +1380,15 @@ void __cpuinit end_local_APIC_setup(void)
}
#endif
- setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
+
+ /*
+ * Now that local APIC setup is completed for BP, configure the fault
+ * handling for interrupt remapping.
+ */
+ if (!smp_processor_id() && intr_remapping_enabled)
+ enable_drhd_fault_handling();
+
}
#ifdef CONFIG_X86_X2APIC
@@ -1694,7 +1697,7 @@ void __init register_lapic_address(unsigned long address)
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
-int apic_version[MAX_APICS];
+int apic_version[MAX_LOCAL_APIC];
int __init APIC_init_uniprocessor(void)
{
@@ -1759,17 +1762,10 @@ int __init APIC_init_uniprocessor(void)
setup_IO_APIC();
else {
nr_ioapics = 0;
- localise_nmi_watchdog();
}
-#else
- localise_nmi_watchdog();
#endif
x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
- check_nmi_watchdog();
-#endif
-
return 0;
}
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..72ec29e1ae06 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,19 +17,31 @@
#include <linux/nmi.h>
#include <linux/module.h>
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(void)
{
return (u64)(cpu_khz) * 1000 * 60;
}
+#endif
+
+#ifdef arch_trigger_all_cpu_backtrace
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
-#ifdef ARCH_HAS_NMI_WATCHDOG
void arch_trigger_all_cpu_backtrace(void)
{
int i;
+ if (test_and_set_bit(0, &backtrace_flag))
+ /*
+ * If there is already a trigger_all_cpu_backtrace() in progress
+ * (backtrace_flag == 1), don't output double cpu dump infos.
+ */
+ return;
+
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -41,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
break;
mdelay(1);
}
+
+ clear_bit(0, &backtrace_flag);
+ smp_mb__after_clear_bit();
}
static int __kprobes
@@ -49,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
{
struct die_args *args = __args;
struct pt_regs *regs;
- int cpu = smp_processor_id();
+ int cpu;
switch (cmd) {
case DIE_NMI:
@@ -61,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
}
regs = args->regs;
+ cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -90,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
}
early_initcall(register_trigger_all_cpu_backtrace);
#endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index bb61a552d8c6..52735a710c30 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
#include <asm/dma.h>
#include <asm/timer.h>
#include <asm/i8259.h>
-#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
#include <asm/setup.h>
@@ -2458,13 +2457,12 @@ static void ack_apic_level(struct irq_data *data)
{
struct irq_cfg *cfg = data->chip_data;
int i, do_unmask_irq = 0, irq = data->irq;
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long v;
irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
mask_ioapic(cfg);
}
@@ -2671,24 +2669,6 @@ static void lapic_register_intr(int irq)
"edge");
}
-static void __init setup_nmi(void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
- enable_NMI_through_LVT0();
-
- apic_printk(APIC_VERBOSE, " done.\n");
-}
-
/*
* This looks a bit hackish but it's about the only one way of sending
* a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2794,15 +2774,6 @@ static inline void __init check_timer(void)
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
legacy_pic->init(1);
-#ifdef CONFIG_X86_32
- {
- unsigned int ver;
-
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
- }
-#endif
pin1 = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2850,10 +2821,6 @@ static inline void __init check_timer(void)
unmask_ioapic(cfg);
}
if (timer_irq_works()) {
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- legacy_pic->unmask(0);
- }
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
goto out;
@@ -2879,11 +2846,6 @@ static inline void __init check_timer(void)
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
- if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->mask(0);
- setup_nmi();
- legacy_pic->unmask(0);
- }
goto out;
}
/*
@@ -2895,15 +2857,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
- if (nmi_watchdog == NMI_IO_APIC) {
- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
- "through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = NMI_NONE;
- }
-#ifdef CONFIG_X86_32
- timer_ack = 0;
-#endif
-
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
@@ -3441,6 +3394,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
dmar_msi_write(irq, &msg);
@@ -4133,7 +4087,8 @@ void __init pre_init_apic_IRQ0(void)
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+ physid_set_mask_of_physid(boot_cpu_physical_apicid,
+ &phys_cpu_present_map);
#endif
/* Make sure the irq descriptor is set up */
cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * NMI watchdog support on APIC systems
- *
- * Started by Ingo Molnar <mingo@redhat.com>
- *
- * Fixes:
- * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
- * Mikael Pettersson : Power Management for local APIC NMI watchdog.
- * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- * be enabled
- * 0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
- return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
- return atomic_read(&mce_entry) > 0;
-#endif
- return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
- return per_cpu(irq_stat, cpu).apic_timer_irqs +
- per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
- local_irq_enable_in_hardirq();
- /*
- * Intentionally don't use cpu_relax here. This is
- * to make sure that the performance counter really ticks,
- * even if there is a simulator or similar that catches the
- * pause instruction. On a real HT machine this is fine because
- * all other CPUs are busy with "useless" delay loops and don't
- * care if they get somewhat less cycles.
- */
- while (endflag == 0)
- mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
- printk(KERN_CONT "\n");
-
- printk(KERN_WARNING
- "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
- cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
- printk(KERN_WARNING
- "Please report this to bugzilla.kernel.org,\n");
- printk(KERN_WARNING
- "and attach the output of the 'dmesg' command.\n");
-
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
- unsigned int *prev_nmi_count;
- int cpu;
-
- if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
- return 0;
-
- prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
- if (!prev_nmi_count)
- goto error;
-
- printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
- if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
- for_each_possible_cpu(cpu)
- prev_nmi_count[cpu] = get_nmi_count(cpu);
- local_irq_enable();
- mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
- for_each_online_cpu(cpu) {
- if (!per_cpu(wd_enabled, cpu))
- continue;
- if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
- report_broken_nmi(cpu, prev_nmi_count);
- }
- endflag = 1;
- if (!atomic_read(&nmi_active)) {
- kfree(prev_nmi_count);
- atomic_set(&nmi_active, -1);
- goto error;
- }
- printk("OK.\n");
-
- /*
- * now that we know it works we can reduce NMI frequency to
- * something more reasonable; makes a difference in some configs
- */
- if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = lapic_adjust_nmi_hz(1);
-
- kfree(prev_nmi_count);
- return 0;
-error:
- if (nmi_watchdog == NMI_IO_APIC) {
- if (!timer_through_8259)
- legacy_pic->mask(0);
- on_each_cpu(__acpi_nmi_disable, NULL, 1);
- }
-
-#ifdef CONFIG_X86_32
- timer_ack = 0;
-#endif
- return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
- unsigned int nmi;
-
- if (!strncmp(str, "panic", 5)) {
- panic_on_timeout = 1;
- str = strchr(str, ',');
- if (!str)
- return 1;
- ++str;
- }
-
- if (!strncmp(str, "lapic", 5))
- nmi_watchdog = NMI_LOCAL_APIC;
- else if (!strncmp(str, "ioapic", 6))
- nmi_watchdog = NMI_IO_APIC;
- else {
- get_option(&str, &nmi);
- if (nmi >= NMI_INVALID)
- return 0;
- nmi_watchdog = nmi;
- }
-
- return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- nmi_pm_active = atomic_read(&nmi_active);
- stop_apic_nmi_watchdog(NULL);
- BUG_ON(atomic_read(&nmi_active) != 0);
- return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- if (nmi_pm_active > 0) {
- setup_apic_nmi_watchdog(NULL);
- touch_nmi_watchdog();
- }
- return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
- .name = "lapic_nmi",
- .resume = lapic_nmi_resume,
- .suspend = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
- .id = 0,
- .cls = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
- int error;
-
- /*
- * should really be a BUG_ON but b/c this is an
- * init call, it just doesn't work. -dcz
- */
- if (nmi_watchdog != NMI_LOCAL_APIC)
- return 0;
-
- if (atomic_read(&nmi_active) < 0)
- return 0;
-
- error = sysdev_class_register(&nmi_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic_nmi);
- return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
- __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
- if (__get_cpu_var(wd_enabled))
- return;
-
- /* cheap hack to support suspend/resume */
- /* if cpu0 is not active neither should the other cpus */
- if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
- return;
-
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- if (lapic_watchdog_init(nmi_hz) < 0) {
- __get_cpu_var(wd_enabled) = 0;
- return;
- }
- /* FALL THROUGH */
- case NMI_IO_APIC:
- __get_cpu_var(wd_enabled) = 1;
- atomic_inc(&nmi_active);
- }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
- /* only support LOCAL and IO APICs for now */
- if (!nmi_watchdog_active())
- return;
- if (__get_cpu_var(wd_enabled) == 0)
- return;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- lapic_watchdog_stop();
- else
- __acpi_nmi_disable(NULL);
- __get_cpu_var(wd_enabled) = 0;
- atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
- if (nmi_watchdog_active()) {
- unsigned cpu;
-
- /*
- * Tell other CPUs to reset their alert counters. We cannot
- * do it ourselves because the alert count increase is not
- * atomic.
- */
- for_each_present_cpu(cpu) {
- if (per_cpu(nmi_touch, cpu) != 1)
- per_cpu(nmi_touch, cpu) = 1;
- }
- }
-
- /*
- * Tickle the softlockup detector too:
- */
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
- /*
- * Since current_thread_info()-> is always on the stack, and we
- * always switch the stack NMI-atomically, it's safe to use
- * smp_processor_id().
- */
- unsigned int sum;
- int touched = 0;
- int cpu = smp_processor_id();
- int rc = 0;
-
- sum = get_timer_irqs(cpu);
-
- if (__get_cpu_var(nmi_touch)) {
- __get_cpu_var(nmi_touch) = 0;
- touched = 1;
- }
-
- /* We can be called before check_nmi_watchdog, hence NULL check. */
- if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
- raw_spin_lock(&lock);
- printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
- show_regs(regs);
- dump_stack();
- raw_spin_unlock(&lock);
- cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
- rc = 1;
- }
-
- /* Could check oops_in_progress here too, but it's safer not to */
- if (mce_in_progress())
- touched = 1;
-
- /* if the none of the timers isn't firing, this cpu isn't doing much */
- if (!touched && __get_cpu_var(last_irq_sum) == sum) {
- /*
- * Ayiee, looks like this CPU is stuck ...
- * wait a few IRQs (5 seconds) before doing the oops ...
- */
- __this_cpu_inc(alert_counter);
- if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi("BUG: NMI Watchdog detected LOCKUP",
- regs, panic_on_timeout);
- } else {
- __get_cpu_var(last_irq_sum) = sum;
- __this_cpu_write(alert_counter, 0);
- }
-
- /* see if the nmi watchdog went off */
- if (!__get_cpu_var(wd_enabled))
- return rc;
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- rc |= lapic_wd_event(nmi_hz);
- break;
- case NMI_IO_APIC:
- /*
- * don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- break;
- }
- return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
- __get_cpu_var(wd_enabled) = 1;
- atomic_inc(&nmi_active);
- __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
- on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
- touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
- unknown_nmi_panic = 1;
- return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
- unsigned char reason = get_nmi_reason();
- char buf[64];
-
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(buf, regs, 1); /* Always panic here */
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
- old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, buffer, length, ppos);
- if (!!old_state == !!nmi_watchdog_enabled)
- return 0;
-
- if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
- printk(KERN_WARNING
- "NMI watchdog is permanently disabled\n");
- return -EIO;
- }
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- if (nmi_watchdog_enabled)
- enable_lapic_nmi_watchdog();
- else
- disable_lapic_nmi_watchdog();
- } else if (nmi_watchdog == NMI_IO_APIC) {
- if (nmi_watchdog_enabled)
- enable_ioapic_nmi_watchdog();
- else
- disable_ioapic_nmi_watchdog();
- } else {
- printk(KERN_WARNING
- "NMI watchdog doesn't know what hardware to touch\n");
- return -EIO;
- }
- return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
- int i;
-
- cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
- printk(KERN_INFO "sending NMI to all CPUs:\n");
- apic->send_IPI_all(NMI_VECTOR);
-
- /* Wait for up to 10 seconds for all CPUs to do the backtrace */
- for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(to_cpumask(backtrace_mask)))
- break;
- mdelay(1);
- }
-}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
}
-
- /*
- * Now that apic routing model is selected, configure the
- * fault handling for intr remapping.
- */
- if (intr_remapping_enabled)
- enable_drhd_fault_handling();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539aea175..2a3f2a7db243 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,8 +44,20 @@ static u64 gru_start_paddr, gru_end_paddr;
static union uvh_apicid uvh_apicid;
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static DEFINE_SPINLOCK(uv_nmi_lock);
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+ unsigned long val, *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+ val = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return val;
+}
+
static inline bool is_GRU_range(u64 start, u64 end)
{
return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -56,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
return is_ISA_range(start, end) || is_GRU_range(start, end);
}
-static int early_get_nodeid(void)
+static int __init early_get_pnodeid(void)
{
union uvh_node_id_u node_id;
- unsigned long *mmr;
-
- mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
- node_id.v = *mmr;
- early_iounmap(mmr, sizeof(*mmr));
+ union uvh_rh_gam_config_mmr_u m_n_config;
+ int pnode;
/* Currently, all blades have same revision number */
+ node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+ m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
uv_min_hub_revision_id = node_id.s.revision;
- return node_id.s.node_id;
+ pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+ return pnode;
}
static void __init early_get_apic_pnode_shift(void)
{
- unsigned long *mmr;
-
- mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
- uvh_apicid.v = *mmr;
- early_iounmap(mmr, sizeof(*mmr));
+ uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
if (!uvh_apicid.v)
/*
* Old bios, use default value
@@ -85,12 +93,25 @@ static void __init early_get_apic_pnode_shift(void)
uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
}
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB. This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+ union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
+
+ apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
+ uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
+}
+
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- int nodeid;
+ int pnodeid;
if (!strcmp(oem_id, "SGI")) {
- nodeid = early_get_nodeid();
+ pnodeid = early_get_pnodeid();
early_get_apic_pnode_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
@@ -100,8 +121,9 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
__get_cpu_var(x2apic_extra_bits) =
- nodeid << (uvh_apicid.s.pnode_shift - 1);
+ pnodeid << uvh_apicid.s.pnode_shift;
uv_system_type = UV_NON_UNIQUE_APIC;
+ uv_set_apicid_hibit();
return 1;
}
}
@@ -155,6 +177,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
int pnode;
pnode = uv_apicid_to_pnode(phys_apicid);
+ phys_apicid |= uv_apicid_hibits;
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +259,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
int cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
else
return BAD_APICID;
}
@@ -255,7 +278,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- return per_cpu(x86_cpu_to_apicid, cpu);
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -661,27 +684,32 @@ void uv_nmi_init(void)
void __init uv_system_init(void)
{
union uvh_rh_gam_config_mmr_u m_n_config;
+ union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
- int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+ int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
int gnode_extra, max_pnode = 0;
unsigned long mmr_base, present, paddr;
- unsigned short pnode_mask;
+ unsigned short pnode_mask, pnode_io_mask;
map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+ n_io = mmioh.s.n_io;
mmr_base =
uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
~UV_MMR_ENABLE;
pnode_mask = (1 << n_val) - 1;
+ pnode_io_mask = (1 << n_io) - 1;
+
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
gnode_upper = ((unsigned long)gnode_extra << m_val);
- printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
- n_val, m_val, gnode_upper, gnode_extra);
+ printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+ n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
@@ -714,7 +742,7 @@ void __init uv_system_init(void)
for (j = 0; j < 64; j++) {
if (!test_bit(j, &present))
continue;
- pnode = (i * 64 + j);
+ pnode = (i * 64 + j) & pnode_mask;
uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
@@ -735,6 +763,7 @@ void __init uv_system_init(void)
/*
* apic_pnode_shift must be set before calling uv_apicid_to_pnode();
*/
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
pnode = uv_apicid_to_pnode(apicid);
blade = boot_pnode_to_blade(pnode);
@@ -751,7 +780,6 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -775,7 +803,7 @@ void __init uv_system_init(void)
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
- map_mmioh_high(max_pnode);
+ map_mmioh_high(max_pnode & pnode_io_mask);
uv_cpu_init();
uv_scir_register_cpu_notifier();