aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc/kernel/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/smp.c')
-rw-r--r--arch/powerpc/kernel/smp.c255
1 files changed, 140 insertions, 115 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bcefab484ea6..a60e4139214b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -35,6 +35,7 @@
#include <linux/stackprotector.h>
#include <linux/pgtable.h>
#include <linux/clockchips.h>
+#include <linux/kexec.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -46,6 +47,7 @@
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
#include <asm/mpic.h>
@@ -55,12 +57,13 @@
#endif
#include <asm/vdso.h>
#include <asm/debug.h>
-#include <asm/kexec.h>
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
#include <asm/kup.h>
#include <asm/fadump.h>
+#include <trace/events/ipi.h>
+
#ifdef DEBUG
#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
@@ -74,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
struct task_struct *secondary_current;
-bool has_big_cores;
-bool coregroup_enabled;
-bool thread_group_shares_l2;
-bool thread_group_shares_l3;
+bool has_big_cores __ro_after_init;
+bool coregroup_enabled __ro_after_init;
+bool thread_group_shares_l2 __ro_after_init;
+bool thread_group_shares_l3 __ro_after_init;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -90,15 +93,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
EXPORT_SYMBOL_GPL(has_big_cores);
-enum {
-#ifdef CONFIG_SCHED_SMT
- smt_idx,
-#endif
- cache_idx,
- mc_idx,
- die_idx,
-};
-
#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2_L3 2
@@ -289,7 +283,7 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
* Order previous accesses before accesses in the IPI handler.
*/
smp_mb();
- message[msg] = 1;
+ WRITE_ONCE(message[msg], 1);
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
@@ -348,7 +342,7 @@ irqreturn_t smp_ipi_demux_relaxed(void)
if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
nmi_ipi_action(0, NULL);
#endif
- } while (info->messages);
+ } while (READ_ONCE(info->messages));
return IRQ_HANDLED;
}
@@ -364,12 +358,12 @@ static inline void do_message_pass(int cpu, int msg)
#endif
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
do_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
@@ -415,9 +409,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
{
raw_local_irq_save(*flags);
hard_irq_disable();
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -425,15 +419,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
noinstr static void nmi_ipi_lock(void)
{
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
}
noinstr static void nmi_ipi_unlock(void)
{
smp_mb();
- WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
- arch_atomic_set(&__nmi_ipi_lock, 0);
+ WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+ raw_atomic_set(&__nmi_ipi_lock, 0);
}
noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
@@ -619,20 +613,6 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
}
#endif
-#ifdef CONFIG_NMI_IPI
-static void crash_stop_this_cpu(struct pt_regs *regs)
-#else
-static void crash_stop_this_cpu(void *dummy)
-#endif
-{
- /*
- * Just busy wait here and avoid marking CPU as offline to ensure
- * register data is captured appropriately.
- */
- while (1)
- cpu_relax();
-}
-
void crash_smp_send_stop(void)
{
static bool stopped = false;
@@ -651,11 +631,14 @@ void crash_smp_send_stop(void)
stopped = true;
-#ifdef CONFIG_NMI_IPI
- smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
-#else
- smp_call_function(crash_stop_this_cpu, NULL, 0);
-#endif /* CONFIG_NMI_IPI */
+#ifdef CONFIG_KEXEC_CORE
+ if (kexec_crash_image) {
+ crash_kexec_prepare();
+ return;
+ }
+#endif
+
+ smp_send_stop();
}
#ifdef CONFIG_NMI_IPI
@@ -719,7 +702,7 @@ static struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
per_cpu(next_tlbcam_idx, id)
= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
@@ -995,13 +978,13 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
return 0;
}
-static bool shared_caches;
+static bool shared_caches __ro_after_init;
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
@@ -1012,6 +995,13 @@ static int powerpc_smt_flags(void)
#endif
/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
* This topology makes it *much* cheaper to migrate tasks between adjacent cores
* since the migrated task remains cache hot. We want to take advantage of this
@@ -1019,7 +1009,18 @@ static int powerpc_smt_flags(void)
*/
static int powerpc_shared_cache_flags(void)
{
- return SD_SHARE_PKG_RESOURCES;
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_LLC | SD_ASYM_PACKING;
+
+ return SD_SHARE_LLC;
+}
+
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
}
/*
@@ -1045,6 +1046,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu)
static bool has_coregroup_support(void)
{
+ /* Coregroup identification not available on shared systems */
+ if (is_shared_processor())
+ return 0;
+
return coregroup_enabled;
}
@@ -1053,16 +1058,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
return cpu_coregroup_mask(cpu);
}
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_mc_mask, SD_INIT_NAME(MC) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
static int __init init_big_cores(void)
{
int cpu;
@@ -1096,7 +1091,7 @@ static int __init init_big_cores(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- unsigned int cpu;
+ unsigned int cpu, num_threads;
DBG("smp_prepare_cpus\n");
@@ -1163,6 +1158,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (smp_ops && smp_ops->probe)
smp_ops->probe();
+
+ // Initalise the generic SMT topology support
+ num_threads = 1;
+ if (smt_enabled_at_boot)
+ num_threads = smt_enabled_at_boot;
+ cpu_smt_set_num_threads(num_threads, threads_per_core);
}
void smp_prepare_boot_cpu(void)
@@ -1260,7 +1261,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
- THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ THREAD_SIZE - STACK_FRAME_MIN_SIZE;
#endif
task_thread_info(idle)->cpu = cpu;
secondary_current = current_set[cpu] = idle;
@@ -1268,7 +1269,12 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- int rc, c;
+ const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
+ const bool booting = system_state < SYSTEM_RUNNING;
+ const unsigned long hp_spin_ms = 1;
+ unsigned long deadline;
+ int rc;
+ const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
/*
* Don't allow secondary threads to come online if inhibited
@@ -1313,22 +1319,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
/*
- * wait to see if the cpu made a callin (is actually up).
- * use this value that I found through experimentation.
- * -- Cort
+ * At boot time, simply spin on the callin word until the
+ * deadline passes.
+ *
+ * At run time, spin for an optimistic amount of time to avoid
+ * sleeping in the common case.
*/
- if (system_state < SYSTEM_RUNNING)
- for (c = 50000; c && !cpu_callin_map[cpu]; c--)
- udelay(100);
-#ifdef CONFIG_HOTPLUG_CPU
- else
- /*
- * CPUs can take much longer to come up in the
- * hotplug case. Wait five seconds.
- */
- for (c = 5000; c && !cpu_callin_map[cpu]; c--)
- msleep(1);
-#endif
+ deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
+ spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
+
+ if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
+ const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
+ const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
+
+ deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
+ while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
+ fsleep(sleep_interval_us);
+ }
if (!cpu_callin_map[cpu]) {
printk(KERN_ERR "Processor %u is stuck.\n", cpu);
@@ -1591,7 +1598,7 @@ static void add_cpu_to_masks(int cpu)
/* Skip all CPUs already part of current CPU core mask */
cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
- /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+ /* If chip_id is -1; limit the cpu_core_mask to within PKG */
if (chip_id == -1)
cpumask_and(mask, mask, cpu_cpu_mask(cpu));
@@ -1608,6 +1615,7 @@ static void add_cpu_to_masks(int cpu)
}
/* Activate a secondary processor. */
+__no_stack_protector
void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
@@ -1616,12 +1624,15 @@ void start_secondary(void *unused)
if (IS_ENABLED(CONFIG_PPC32))
setup_kup();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
@@ -1674,50 +1685,45 @@ void start_secondary(void *unused)
BUG();
}
-#ifdef CONFIG_PROFILING
-int setup_profiling_timer(unsigned int multiplier)
-{
- return 0;
-}
-#endif
+static struct sched_domain_topology_level powerpc_topology[6];
-static void __init fixup_topology(void)
+static void __init build_sched_topology(void)
{
- int i;
+ int i = 0;
+
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
+ } else {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+ };
}
#endif
+ if (shared_caches) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
+ };
+ }
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
+ };
+ }
+ powerpc_topology[i++] = (struct sched_domain_topology_level){
+ cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG)
+ };
- if (!has_coregroup_support())
- powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-
- /*
- * Try to consolidate topology levels here instead of
- * allowing scheduler to degenerate.
- * - Dont consolidate if masks are different.
- * - Dont consolidate if sd_flags exists and are different.
- */
- for (i = 1; i <= die_idx; i++) {
- if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
- continue;
-
- if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
- powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
- continue;
-
- if (!powerpc_topology[i - 1].sd_flags)
- powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
- powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
- powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
-#ifdef CONFIG_SCHED_DEBUG
- powerpc_topology[i].name = powerpc_topology[i + 1].name;
-#endif
- }
+ set_sched_topology(powerpc_topology);
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -1732,9 +1738,20 @@ void __init smp_cpus_done(unsigned int max_cpus)
smp_ops->bringup_done();
dump_numa_cpu_topology();
+ build_sched_topology();
+}
- fixup_topology();
- set_sched_topology(powerpc_topology);
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return -cpu / threads_per_core;
+
+ return -cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1760,11 +1777,19 @@ int __cpu_disable(void)
void __cpu_die(unsigned int cpu)
{
+ /*
+ * This could perhaps be a generic call in idlea_task_dead(), but
+ * that requires testing from all archs, so first put it here to
+ */
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+ dec_mm_active_cpus(&init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
if (smp_ops->cpu_die)
smp_ops->cpu_die(cpu);
}
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
/*
* Disable on the down path. This will be re-enabled by