aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/smp.c')
-rw-r--r--arch/powerpc/kernel/smp.c299
1 files changed, 248 insertions, 51 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d68ed1f004a3..df2a41647d8e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -39,6 +39,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
@@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS];
int smt_enabled_at_boot = 1;
-static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
-
/*
* Returns 1 if the specified cpu should be brought up during boot.
* Used to inhibit booting threads if they've been disabled or
@@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t debug_ipi_action(int irq, void *data)
+#ifdef CONFIG_NMI_IPI
+static irqreturn_t nmi_ipi_action(int irq, void *data)
{
- if (crash_ipi_function_ptr) {
- crash_ipi_function_ptr(get_irq_regs());
- return IRQ_HANDLED;
- }
-
-#ifdef CONFIG_DEBUGGER
- debugger_ipi(get_irq_regs());
-#endif /* CONFIG_DEBUGGER */
-
+ smp_handle_nmi_ipi(get_irq_regs());
return IRQ_HANDLED;
}
+#endif
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
- [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+#ifdef CONFIG_NMI_IPI
+ [PPC_MSG_NMI_IPI] = nmi_ipi_action,
+#endif
};
+/*
+ * The NMI IPI is a fallback and not truly non-maskable. It is simpler
+ * than going through the call function infrastructure, and strongly
+ * serialized, so it is more appropriate for debugging.
+ */
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
- [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ [PPC_MSG_NMI_IPI] = "nmi ipi",
};
/* optional function to request ipi, for controllers with >= 4 ipis */
@@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg)
{
int err;
- if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ if (msg < 0 || msg > PPC_MSG_NMI_IPI)
return -EINVAL;
- }
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
- if (msg == PPC_MSG_DEBUGGER_BREAK) {
+#ifndef CONFIG_NMI_IPI
+ if (msg == PPC_MSG_NMI_IPI)
return 1;
- }
#endif
+
err = request_irq(virq, smp_ipi_action[msg],
IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
smp_ipi_name[msg], NULL);
@@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
long messages; /* current messages */
- unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
-void smp_muxed_ipi_set_data(int cpu, unsigned long data)
-{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
- info->data = data;
-}
-
void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
@@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
smp_muxed_ipi_set_message(cpu, msg);
+
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
*/
- smp_ops->cause_ipi(cpu, info->data);
+ smp_ops->cause_ipi(cpu);
}
#ifdef __BIG_ENDIAN__
@@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
irqreturn_t smp_ipi_demux(void)
{
- struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned long all;
-
mb(); /* order any irq clear */
+ return smp_ipi_demux_relaxed();
+}
+
+/* sync-free variant. Callers should ensure synchronization */
+irqreturn_t smp_ipi_demux_relaxed(void)
+{
+ struct cpu_messages *info;
+ unsigned long all;
+
+ info = this_cpu_ptr(&ipi_message);
do {
all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
@@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void)
scheduler_ipi();
if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
tick_broadcast_ipi_handler();
- if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
- debug_ipi_action(0, NULL);
+#ifdef CONFIG_NMI_IPI
+ if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
+ nmi_ipi_action(0, NULL);
+#endif
} while (info->messages);
return IRQ_HANDLED;
@@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_NMI_IPI
+
+/*
+ * "NMI IPI" system.
+ *
+ * NMI IPIs may not be recoverable, so should not be used as ongoing part of
+ * a running system. They can be used for crash, debug, halt/reboot, etc.
+ *
+ * NMI IPIs are globally single threaded. No more than one in progress at
+ * any time.
+ *
+ * The IPI call waits with interrupts disabled until all targets enter the
+ * NMI handler, then the call returns.
+ *
+ * No new NMI can be initiated until targets exit the handler.
+ *
+ * The IPI call may time out without all targets entering the NMI handler.
+ * In that case, there is some logic to recover (and ignore subsequent
+ * NMI interrupts that may eventually be raised), but the platform interrupt
+ * handler may not be able to distinguish this from other exception causes,
+ * which may cause a crash.
+ */
+
+static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
+static struct cpumask nmi_ipi_pending_mask;
+static int nmi_ipi_busy_count = 0;
+static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
+
+static void nmi_ipi_lock_start(unsigned long *flags)
+{
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ raw_local_irq_restore(*flags);
+ cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
+}
+
+static void nmi_ipi_lock(void)
+{
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ cpu_relax();
+}
+
+static void nmi_ipi_unlock(void)
+{
+ smp_mb();
+ WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
+ atomic_set(&__nmi_ipi_lock, 0);
+}
+
+static void nmi_ipi_unlock_end(unsigned long *flags)
+{
+ nmi_ipi_unlock();
+ raw_local_irq_restore(*flags);
+}
+
+/*
+ * Platform NMI handler calls this to ack
+ */
+int smp_handle_nmi_ipi(struct pt_regs *regs)
+{
+ void (*fn)(struct pt_regs *);
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 0;
+
+ /*
+ * Unexpected NMIs are possible here because the interrupt may not
+ * be able to distinguish NMI IPIs from other types of NMIs, or
+ * because the caller may have timed out.
+ */
+ nmi_ipi_lock_start(&flags);
+ if (!nmi_ipi_busy_count)
+ goto out;
+ if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
+ goto out;
+
+ fn = nmi_ipi_function;
+ if (!fn)
+ goto out;
+
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ ret = 1;
+
+ fn(regs);
+
+ nmi_ipi_lock();
+ nmi_ipi_busy_count--;
+out:
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+
+static void do_smp_send_nmi_ipi(int cpu)
+{
+ if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+ return;
+
+ if (cpu >= 0) {
+ do_message_pass(cpu, PPC_MSG_NMI_IPI);
+ } else {
+ int c;
+
+ for_each_online_cpu(c) {
+ if (c == raw_smp_processor_id())
+ continue;
+ do_message_pass(c, PPC_MSG_NMI_IPI);
+ }
+ }
+}
+
+/*
+ * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
+ * - fn is the target callback function.
+ * - delay_us > 0 is the delay before giving up waiting for targets to
+ * enter the handler, == 0 specifies indefinite delay.
+ */
+static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 1;
+
+ BUG_ON(cpu == me);
+ BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
+
+ if (unlikely(!smp_ops))
+ return 0;
+
+ /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy_count) {
+ nmi_ipi_unlock_end(&flags);
+ cpu_relax();
+ nmi_ipi_lock_start(&flags);
+ }
+
+ nmi_ipi_function = fn;
+
+ if (cpu < 0) {
+ /* ALL_OTHERS */
+ cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ } else {
+ /* cpumask starts clear */
+ cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ do_smp_send_nmi_ipi(cpu);
+
+ while (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ udelay(1);
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ break;
+ }
+ }
+
+ nmi_ipi_lock();
+ if (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ /* Could not gather all CPUs */
+ ret = 0;
+ cpumask_clear(&nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count--;
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+#endif /* CONFIG_NMI_IPI */
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
-void smp_send_debugger_break(void)
+#ifdef CONFIG_DEBUGGER
+void debugger_ipi_callback(struct pt_regs *regs)
{
- int cpu;
- int me = raw_smp_processor_id();
-
- if (unlikely(!smp_ops))
- return;
+ debugger_ipi(regs);
+}
- for_each_online_cpu(cpu)
- if (cpu != me)
- do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+void smp_send_debugger_break(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
}
#endif
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
- crash_ipi_function_ptr = crash_ipi_callback;
- if (crash_ipi_callback) {
- mb();
- smp_send_debugger_break();
- }
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
}
#endif
@@ -439,7 +612,21 @@ int generic_cpu_disable(void)
#ifdef CONFIG_PPC64
vdso_data->processorCount--;
#endif
- migrate_irqs();
+ /* Update affinity of all IRQs previously aimed at this CPU */
+ irq_migrate_all_off_this_cpu();
+
+ /*
+ * Depending on the details of the interrupt controller, it's possible
+ * that one of the interrupts we just migrated away from this CPU is
+ * actually already pending on this CPU. If we leave it in that state
+ * the interrupt will never be EOI'ed, and will never fire again. So
+ * temporarily enable interrupts here, to allow any pending interrupt to
+ * be received (and EOI'ed), before we take this CPU offline.
+ */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
return 0;
}
@@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
cpu_idle_thread_init(cpu, tidle);
+ /*
+ * The platform might need to allocate resources prior to bringing
+ * up the CPU
+ */
+ if (smp_ops->prepare_cpu) {
+ rc = smp_ops->prepare_cpu(cpu);
+ if (rc)
+ return rc;
+ }
+
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
*/