From 1c7b74d46fed530cca22a9a54140cdac2815c797 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 6 Jan 2011 16:18:47 -0500 Subject: x86, NMI: Add NMI symbol constants and rename memory parity to PCI SERR Replace the NMI related magic numbers with symbol constants. Memory parity error is only valid for IBM PC-AT, newer machine use bit 7 (0x80) of 0x61 port for PCI SERR. While memory error is usually reported via MCE. So corresponding function name and kernel log string is changed. But on some machines, PCI SERR line is still used to report memory errors. This is used by EDAC, so corresponding EDAC call is reserved. Signed-off-by: Huang Ying Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach_traps.h | 12 ++++++++- arch/x86/kernel/traps.c | 51 ++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h index f7920601e472..72a8b52e7dfd 100644 --- a/arch/x86/include/asm/mach_traps.h +++ b/arch/x86/include/asm/mach_traps.h @@ -7,9 +7,19 @@ #include +#define NMI_REASON_PORT 0x61 + +#define NMI_REASON_SERR 0x80 +#define NMI_REASON_IOCHK 0x40 +#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) + +#define NMI_REASON_CLEAR_SERR 0x04 +#define NMI_REASON_CLEAR_IOCHK 0x08 +#define NMI_REASON_CLEAR_MASK 0x0f + static inline unsigned char get_nmi_reason(void) { - return inb(0x61); + return inb(NMI_REASON_PORT); } static inline void reassert_nmi(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c76aaca5694d..c7fd1cea0374 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -310,15 +310,15 @@ static int __init setup_unknown_nmi_panic(char *str) __setup("unknown_nmi_panic", setup_unknown_nmi_panic); static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) +pci_serr_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ #if defined(CONFIG_EDAC) if (edac_handler_set()) { edac_atomic_assert_error(); @@ -329,11 +329,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -341,15 +341,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); show_registers(regs); if (panic_on_io_nmi) panic("NMI IOCK error: Not continuing"); /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); i = 20000; while (--i) { @@ -357,8 +359,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) udelay(100); } - reason &= ~8; - outb(reason, 0x61); + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -377,15 +379,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) return; } #endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) @@ -399,7 +400,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (!cpu) reason = get_nmi_reason(); - if (!(reason & 0xc0)) { + if (!(reason & NMI_REASON_MASK)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; @@ -417,9 +418,9 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) return; /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + if (reason & NMI_REASON_IOCHK) io_check_error(reason, regs); #ifdef CONFIG_X86_32 /* -- cgit v1.2.3-59-g8ed1b From 673a6092ce5f5bec45619b7a7f89cfcf8bcf3c41 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 6 Jan 2011 16:18:48 -0500 Subject: x86: Convert some devices to use DIE_NMIUNKNOWN They are a handful of places in the code that register a die_notifier as a catch all in case no claims the NMI. Unfortunately, they trigger on events like DIE_NMI and DIE_NMI_IPI, which depending on when they registered may collide with other handlers that have the ability to determine if the NMI is theirs or not. The function unknown_nmi_error() makes one last effort to walk the die_chain when no one else has claimed the NMI before spitting out messages that the NMI is unknown. This is a better spot for these devices to execute any code without colliding with the other handlers. The two drivers modified are only compiled on x86 arches I believe, so they shouldn't be affected by other arches that may not have DIE_NMIUNKNOWN defined. Signed-off-by: Don Zickus Cc: Russ Anderson Cc: Corey Minyard Cc: openipmi-developer@lists.sourceforge.net Cc: dann frazier Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- drivers/char/ipmi/ipmi_watchdog.c | 2 +- drivers/watchdog/hpwdt.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c1c52c341f40..927902d90fe6 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -639,7 +639,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index f4d334f2536e..320668f4c3aa 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1081,7 +1081,7 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = data; - if (val != DIE_NMI) + if (val != DIE_NMIUNKNOWN) return NOTIFY_OK; /* Hack, if it's a memory or I/O error, ignore it. */ diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index dea7b5bf6e2c..24b966d5061a 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, unsigned long rom_pl; static int die_nmi_called; - if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) + if (ulReason != DIE_NMIUNKNOWN) goto out; if (!hpwdt_nmi_decoding) -- cgit v1.2.3-59-g8ed1b From 166d751479c6d4e5b17dfc1f204a9c4397c9b3f1 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 6 Jan 2011 16:18:49 -0500 Subject: x86, NMI: Add priorities to handlers In order to consolidate the NMI die_chain events, we need to setup the priorities for the die notifiers. I started by defining a bunch of common priorities that can be used by the notifier blocks. Then I modified the notifier blocks to use the newly created priorities. Now that the priorities are straightened out, it should be easier to remove the event DIE_NMI_IPI. Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 20 ++++++++++++++++++++ arch/x86/kernel/apic/hw_nmi.c | 2 +- arch/x86/kernel/cpu/mcheck/mce-inject.c | 2 +- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/kgdb.c | 3 ++- arch/x86/kernel/reboot.c | 2 ++ arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/nmi_timer_int.c | 2 +- 8 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c4021b953510..c76f5b92b840 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif +/* + * Define some priorities for the nmi notifier call chain. + * + * Create a local nmi bit that has a higher priority than + * external nmis, because the local ones are more frequent. + * + * Also setup some default high/normal/low settings for + * subsystems to registers with. Using 4 bits to seperate + * the priorities. This can go alot higher if needed be. + */ + +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) +#define NMI_HIGH_PRIOR (1ULL << 8) +#define NMI_NORMAL_PRIOR (1ULL << 4) +#define NMI_LOW_PRIOR (1ULL << 0) +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) + void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 72ec29e1ae06..8bc49f1ac7bc 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -96,7 +96,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7bfedb..59546c1219f9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -95,7 +95,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = 1000, + .priority = NMI_LOCAL_NORMAL_PRIOR, }; /* Inject mce on current CPU */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a360d146596..5e14b5e5fb81 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1318,7 +1318,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static struct event_constraint unconstrained; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index cd21b654dec6..d43c84183d8f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -48,6 +48,7 @@ #include #include #include +#include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -606,7 +607,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = -INT_MAX, + .priority = NMI_LOCAL_LOW_PRIOR, }; /** diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c495aa8d4815..9c1c83e4a742 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -778,6 +778,8 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 358c8b9c96a7..6e84ea42085a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -361,7 +361,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = 2 + .priority = NMI_LOCAL_LOW_PRIOR, }; static void nmi_cpu_restore_registers(struct op_msrs *msrs) diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index 0636dd93cef8..720bf5a53c51 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, static struct notifier_block profile_timer_exceptions_nb = { .notifier_call = profile_timer_exceptions_notify, .next = NULL, - .priority = 0 + .priority = NMI_LOW_PRIOR, }; static int timer_start(void) -- cgit v1.2.3-59-g8ed1b From c410b8307702c1e1f35be3fd868ad18e4ba0410f Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 6 Jan 2011 16:18:50 -0500 Subject: x86, NMI: Remove DIE_NMI_IPI With priorities in place and no one really understanding the difference between DIE_NMI and DIE_NMI_IPI, just remove DIE_NMI_IPI and convert everyone to DIE_NMI. This also simplifies default_do_nmi() a little bit. Instead of calling the die_notifier in both the if and else part, just pull it out and call it before the if-statement. This has the side benefit of avoiding a call to the ioport to see if there is an external NMI sitting around until after the (more frequent) internal NMIs are dealt with. Patch-Inspired-by: Huang Ying Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-5-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kdebug.h | 1 - arch/x86/kernel/apic/hw_nmi.c | 1 - arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 ++- arch/x86/kernel/cpu/perf_event.c | 1 - arch/x86/kernel/kgdb.c | 4 ---- arch/x86/kernel/reboot.c | 3 ++- arch/x86/kernel/traps.c | 19 ++++++++----------- arch/x86/oprofile/nmi_int.c | 1 - 8 files changed, 12 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f23eb2528464..ca242d35e873 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -18,7 +18,6 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, - DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 8bc49f1ac7bc..79fd43ca6f96 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 59546c1219f9..a77971979564 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,6 +25,7 @@ #include #include #include +#include /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5e14b5e5fb81..c71bae43a51b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1268,7 +1268,6 @@ perf_event_nmi_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index d43c84183d8f..a4130005028a 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -526,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9c1c83e4a742..fc7aae1e2bc7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include @@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c7fd1cea0374..23f6ac05d04c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -394,6 +394,14 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) unsigned char reason = 0; int cpu; + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + cpu = smp_processor_id(); /* Only the BSP gets external NMIs from the system. */ @@ -401,21 +409,10 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) reason = get_nmi_reason(); if (!(reason & NMI_REASON_MASK)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; - -#ifdef CONFIG_X86_LOCAL_APIC - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; -#endif unknown_nmi_error(reason, regs); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; /* AK: following checks seem to be broken on modern chipsets. FIXME */ if (reason & NMI_REASON_SERR) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 6e84ea42085a..e77ea0b566e0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_NMI: - case DIE_NMI_IPI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); else if (!nmi_enabled) -- cgit v1.2.3-59-g8ed1b From ab846f13f69fa64f8ed69ce0c3e239e075910d23 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 6 Jan 2011 16:18:51 -0500 Subject: x86, NMI: Allow NMI reason io port (0x61) to be processed on any CPU In original NMI handler, NMI reason io port (0x61) is only processed on BSP. This makes it impossible to hot-remove BSP. To solve the issue, a raw spinlock is used to allow the port to be processed on any CPU. Originally-by: Huang Ying Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-6-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 23f6ac05d04c..613b3d284a89 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); static inline void conditional_sti(struct pt_regs *regs) { @@ -392,7 +397,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - int cpu; /* * CPU-specific NMI must be processed before non-CPU-specific @@ -402,13 +406,12 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); if (!(reason & NMI_REASON_MASK)) { + raw_spin_unlock(&nmi_reason_lock); unknown_nmi_error(reason, regs); return; @@ -426,6 +429,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) */ reassert_nmi(); #endif + raw_spin_unlock(&nmi_reason_lock); } dotraplinkage notrace __kprobes void -- cgit v1.2.3-59-g8ed1b From f2fd43954abc058586e95d4eb91e7a5477070704 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 6 Jan 2011 16:18:52 -0500 Subject: x86, NMI: Clean-up default_do_nmi() Just re-arrange the code a bit to make it easier to follow what is going on. Basically un-negating the if-statement and swapping the code inside the if-statement with code outside. No functional changes. Originally-by: Huang Ying Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra LKML-Reference: <1294348732-15030-7-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 613b3d284a89..b9b67166f9de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -410,26 +410,24 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) raw_spin_lock(&nmi_reason_lock); reason = get_nmi_reason(); - if (!(reason & NMI_REASON_MASK)) { + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); +#endif raw_spin_unlock(&nmi_reason_lock); - unknown_nmi_error(reason, regs); - return; } - - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif raw_spin_unlock(&nmi_reason_lock); + + unknown_nmi_error(reason, regs); } dotraplinkage notrace __kprobes void -- cgit v1.2.3-59-g8ed1b From 39a6eebda253aa34d659ca9436e3c32ef60473f1 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Fri, 7 Jan 2011 16:59:49 +0100 Subject: x86, dumpstack: Fix unused variable warning In dump_stack function, bp isn't used anymore, which is introduced by commit 9c0729dc8062bed96189bd14ac6d4920f3958743. This patch removes bp completely. Signed-off-by: Rakib Mullick Cc: Soeren Sandmann Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin LKML-Reference: Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/dumpstack.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8474c998cbd4..d6fb146c0d8b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) */ void dump_stack(void) { - unsigned long bp = 0; unsigned long stack; -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - printk("Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, -- cgit v1.2.3-59-g8ed1b From 625dbc3b8acbefefefe27e1d7bbc6e53eb4f3f2d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 6 Jan 2011 15:22:47 +0100 Subject: x86: Save rbp in pt_regs on irq entry From the x86_64 low level interrupt handlers, the frame pointer is saved right after the partial pt_regs frame. rbp is not supposed to be part of the irq partial saved registers, but it only requires to extend the pt_regs frame by 8 bytes to do so, plus a tiny stack offset fixup on irq exit. This changes a bit the semantics or get_irq_entry() that is supposed to provide only the value of caller saved registers and the cpu saved frame. However it's a win for unwinders that can walk through stack frames on top of get_irq_regs() snapshots. A noticeable impact is that it makes perf events cpu-clock and task-clock events based callchains working on x86_64. Let's then save rbp into the irq pt_regs. As a result with: perf record -e cpu-clock perf bench sched messaging perf report --stdio Before: 20.94% perf [kernel.kallsyms] [k] lock_acquire | --- lock_acquire | |--44.01%-- __write_nocancel | |--43.18%-- __read | |--6.08%-- fork | create_worker | |--0.88%-- _dl_fixup | |--0.65%-- do_lookup_x | |--0.53%-- __GI___libc_read --4.67%-- [...] After: 19.23% perf [kernel.kallsyms] [k] __lock_acquire | --- __lock_acquire | |--97.74%-- lock_acquire | | | |--21.82%-- _raw_spin_lock | | | | | |--37.26%-- unix_stream_recvmsg | | | sock_aio_read | | | do_sync_read | | | vfs_read | | | sys_read | | | system_call | | | __read | | | | | |--24.09%-- unix_stream_sendmsg | | | sock_aio_write | | | do_sync_write | | | vfs_write | | | sys_write | | | system_call | | | __write_nocancel v2: Fix cfi annotations. Reported-by: Soeren Sandmann Pedersen Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Jan Beulich --- arch/x86/kernel/entry_64.S | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e3ba417e8697..d3b895f375d3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - movq_cfi rdi, RDI+16-ARGOFFSET - movq_cfi rsi, RSI+16-ARGOFFSET - movq_cfi rdx, RDX+16-ARGOFFSET - movq_cfi rcx, RCX+16-ARGOFFSET - movq_cfi rax, RAX+16-ARGOFFSET - movq_cfi r8, R8+16-ARGOFFSET - movq_cfi r9, R9+16-ARGOFFSET - movq_cfi r10, R10+16-ARGOFFSET - movq_cfi r11, R11+16-ARGOFFSET - - leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + /* + * start from rbp in pt_regs and jump over + * return address. + */ + movq_cfi rdi, RDI+8-RBP + movq_cfi rsi, RSI+8-RBP + movq_cfi rdx, RDX+8-RBP + movq_cfi rcx, RCX+8-RBP + movq_cfi rax, RAX+8-RBP + movq_cfi r8, R8+8-RBP + movq_cfi r9, R9+8-RBP + movq_cfi r10, R10+8-RBP + movq_cfi r11, R11+8-RBP + + leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) @@ -782,8 +786,9 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - subq $ORIG_RAX-ARGOFFSET+8, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 + /* reserve pt_regs for scratch regs and rbp */ + subq $ORIG_RAX-RBP, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP call save_args PARTIAL_FRAME 0 call \func @@ -808,9 +813,14 @@ ret_from_intr: TRACE_IRQS_OFF decl PER_CPU_VAR(irq_count) leaveq + CFI_RESTORE rbp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 + + /* we did not save rbx, restore only from ARGOFFSET */ + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) -- cgit v1.2.3-59-g8ed1b From 047a3772feaae8e43d81d790f3d3f80dae8ae676 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 7 Jan 2011 21:42:06 +0300 Subject: perf, x86: P4 PMU - Fix unflagged overflows handling Don found that P4 PMU reads CCCR register instead of counter itself (in attempt to catch unflagged event) this makes P4 NMI handler to consume all NMIs it observes. So the other NMI users such as kgdb simply have no chance to get NMI on their hands. Side note: at moment there is no way to run nmi-watchdog together with perf tool. This is because both 'perf top' and nmi-watchdog use same event. So while nmi-watchdog reserves one event/counter for own needs there is no room for perf tool left (there is a way to disable nmi-watchdog on boot of course). Ming has tested this patch with the following results | 1. watchdog disabled | | kgdb tests on boot OK | perf works OK | | 2. watchdog enabled, without patch perf-x86-p4-nmi-4 | | kgdb tests on boot hang | | 3. watchdog enabled, without patch perf-x86-p4-nmi-4 and do not run kgdb | tests on boot | | "perf top" partialy works | cpu-cycles no | instructions yes | cache-references no | cache-misses no | branch-instructions no | branch-misses yes | bus-cycles no | | 4. watchdog enabled, with patch perf-x86-p4-nmi-4 applied | | kgdb tests on boot OK | perf does not work, NMI "Dazed and confused" messages show up | Which means we still have problems with p4 box due to 'unknown' nmi happens but at least it should fix kgdb test cases. Reported-by: Jason Wessel Reported-by: Don Zickus Signed-off-by: Cyrill Gorcunov Acked-by: Don Zickus Acked-by: Lin Ming Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <4D275E7E.3040903@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 3 +++ arch/x86/kernel/cpu/perf_event_p4.c | 28 +++++++++++++++------------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 295e2ff18a6a..e2f6a99f14ab 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -20,6 +20,9 @@ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_CNTRVAL_BITS (40) +#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) + #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 81400b93e694..e56b9bfbabd1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -753,19 +753,21 @@ out: static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) { - int overflow = 0; - u32 low, high; + u64 v; - rdmsr(hwc->config_base + hwc->idx, low, high); - - /* we need to check high bit for unflagged overflows */ - if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { - overflow = 1; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, - ((u64)low) & ~P4_CCCR_OVF); + /* an official way for overflow indication */ + rdmsrl(hwc->config_base + hwc->idx, v); + if (v & P4_CCCR_OVF) { + wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + return 1; } - return overflow; + /* it might be unflagged overflow */ + rdmsrl(hwc->event_base + hwc->idx, v); + if (!(v & ARCH_P4_CNTRVAL_MASK)) + return 1; + + return 0; } static void p4_pmu_disable_pebs(void) @@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = { */ .num_counters = ARCH_P4_MAX_CCCR, .apic = 1, - .cntval_bits = 40, - .cntval_mask = (1ULL << 40) - 1, - .max_period = (1ULL << 39) - 1, + .cntval_bits = ARCH_P4_CNTRVAL_BITS, + .cntval_mask = ARCH_P4_CNTRVAL_MASK, + .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, /* -- cgit v1.2.3-59-g8ed1b