aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/x86/kernel/smpboot.c151
1 files changed, 113 insertions, 38 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8de80608fd01..8779a7ed3e87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,6 +53,7 @@
#include <linux/tboot.h>
#include <linux/gfp.h>
#include <linux/cpuidle.h>
+#include <linux/kexec.h>
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
@@ -109,6 +110,20 @@ struct cpumask __cpu_primary_thread_mask __read_mostly;
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;
+struct mwait_cpu_dead {
+ unsigned int control;
+ unsigned int status;
+};
+
+#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF
+#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD
+
+/*
+ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+ * that it's unlikely to be touched by other CPUs.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -166,6 +181,10 @@ static void ap_starting(void)
{
int cpuid = smp_processor_id();
+ /* Mop up eventual mwait_play_dead() wreckage */
+ this_cpu_write(mwait_cpu_dead.status, 0);
+ this_cpu_write(mwait_cpu_dead.control, 0);
+
/*
* If woken up by an INIT in an 82489DX configuration the alive
* synchronization guarantees that the CPU does not reach this
@@ -828,47 +847,40 @@ static void __init smp_quirk_init_udelay(void)
/*
* Wake up AP by INIT, INIT, STARTUP sequence.
*/
-static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+static void send_init_sequence(int phys_apicid)
{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, num_starts, j;
-
- preempt_disable();
- maxlvt = lapic_get_maxlvt();
+ int maxlvt = lapic_get_maxlvt();
- /*
- * Be paranoid about clearing APIC errors.
- */
+ /* Be paranoid about clearing APIC errors. */
if (APIC_INTEGRATED(boot_cpu_apic_version)) {
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ /* Due to the Pentium erratum 3AP. */
+ if (maxlvt > 3)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
- pr_debug("Asserting INIT\n");
-
- /*
- * Turn INIT on target chip
- */
- /*
- * Send IPI
- */
- apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
- phys_apicid);
-
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
+ /* Assert INIT on the target CPU */
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, phys_apicid);
+ safe_apic_wait_icr_idle();
udelay(init_udelay);
- pr_debug("Deasserting INIT\n");
-
- /* Target chip */
- /* Send IPI */
+ /* Deassert INIT on the target CPU */
apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
+ safe_apic_wait_icr_idle();
+}
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
+/*
+ * Wake up AP by INIT, INIT, STARTUP sequence.
+ */
+static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int num_starts, j, maxlvt;
+
+ preempt_disable();
+ maxlvt = lapic_get_maxlvt();
+ send_init_sequence(phys_apicid);
mb();
@@ -1330,6 +1342,25 @@ void arch_thaw_secondary_cpus_end(void)
cache_aps_init();
}
+bool smp_park_other_cpus_in_init(void)
+{
+ unsigned int cpu, this_cpu = smp_processor_id();
+ unsigned int apicid;
+
+ if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
+ return false;
+
+ for_each_present_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ apicid = apic->cpu_present_to_apicid(cpu);
+ if (apicid == BAD_APICID)
+ continue;
+ send_init_sequence(apicid);
+ }
+ return true;
+}
+
/*
* Early setup to make printk work.
*/
@@ -1595,10 +1626,10 @@ void play_dead_common(void)
*/
static inline void mwait_play_dead(void)
{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
@@ -1633,12 +1664,9 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = &current_thread_info()->flags;
+ /* Set up state for the kexec() hack below */
+ md->status = CPUDEAD_MWAIT_WAIT;
+ md->control = CPUDEAD_MWAIT_WAIT;
wbinvd();
@@ -1651,11 +1679,58 @@ static inline void mwait_play_dead(void)
* case where we return around the loop.
*/
mb();
- clflush(mwait_ptr);
+ clflush(md);
mb();
- __monitor(mwait_ptr, 0, 0);
+ __monitor(md, 0, 0);
mb();
__mwait(eax, 0);
+
+ if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
+ /*
+ * Kexec is about to happen. Don't go back into mwait() as
+ * the kexec kernel might overwrite text and data including
+ * page tables and stack. So mwait() would resume when the
+ * monitor cache line is written to and then the CPU goes
+ * south due to overwritten text, page tables and stack.
+ *
+ * Note: This does _NOT_ protect against a stray MCE, NMI,
+ * SMI. They will resume execution at the instruction
+ * following the HLT instruction and run into the problem
+ * which this is trying to prevent.
+ */
+ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
+ while(1)
+ native_halt();
+ }
+ }
+}
+
+/*
+ * Kick all "offline" CPUs out of mwait on kexec(). See comment in
+ * mwait_play_dead().
+ */
+void smp_kick_mwait_play_dead(void)
+{
+ u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT;
+ struct mwait_cpu_dead *md;
+ unsigned int cpu, i;
+
+ for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) {
+ md = per_cpu_ptr(&mwait_cpu_dead, cpu);
+
+ /* Does it sit in mwait_play_dead() ? */
+ if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT)
+ continue;
+
+ /* Wait up to 5ms */
+ for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) {
+ /* Bring it out of mwait */
+ WRITE_ONCE(md->control, newstate);
+ udelay(5);
+ }
+
+ if (READ_ONCE(md->status) != newstate)
+ pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu);
}
}