aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r--kernel/cpu.c615
1 files changed, 463 insertions, 152 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c706af713fb..bbad5e375d3b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3,6 +3,7 @@
*
* This code is licenced under the GPL.
*/
+#include <linux/sched/mm.h>
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
@@ -30,7 +31,11 @@
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
+#include <linux/scs.h>
#include <linux/percpu-rwsem.h>
+#include <linux/cpuset.h>
+#include <linux/random.h>
+#include <linux/cc_platform.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -39,14 +44,19 @@
#include "smpboot.h"
/**
- * cpuhp_cpu_state - Per cpu hotplug state storage
+ * struct cpuhp_cpu_state - Per cpu hotplug state storage
* @state: The current cpu state
* @target: The target state
+ * @fail: Current CPU hotplug callback state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
* @rollback: Perform a rollback
* @single: Single callback invocation
* @bringup: Single callback bringup or teardown selector
+ * @cpu: CPU number
+ * @node: Remote CPU node; for multi-instance, do a
+ * single entry callback for install/remove
+ * @last: For multi-instance rollback, remember how far we got
* @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
* @done_up: Signal completion to the issuer of the task for cpu-up
@@ -103,11 +113,12 @@ static inline void cpuhp_lock_release(bool bringup) { }
#endif
/**
- * cpuhp_step - Hotplug state machine step
+ * struct cpuhp_step - Hotplug state machine step
* @name: Name of the step
* @startup: Startup function of the step
* @teardown: Teardown function of the step
* @cant_stop: Bringup/teardown can't be stopped at this step
+ * @multi_instance: State has multiple instances which get added afterwards
*/
struct cpuhp_step {
const char *name;
@@ -121,7 +132,9 @@ struct cpuhp_step {
int (*multi)(unsigned int cpu,
struct hlist_node *node);
} teardown;
+ /* private: */
struct hlist_head list;
+ /* public: */
bool cant_stop;
bool multi_instance;
};
@@ -134,8 +147,13 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
return cpuhp_hp_states + state;
}
+static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
+{
+ return bringup ? !step->startup.single : !step->teardown.single;
+}
+
/**
- * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * cpuhp_invoke_callback - Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @state: The state to do callbacks for
* @bringup: True if the bringup callback should be invoked
@@ -143,6 +161,8 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
* @lastp: For multi-instance rollback, remember how far we got
*
* Called from cpu hotplug and from the state register machinery.
+ *
+ * Return: %0 on success or a negative errno code
*/
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
bool bringup, struct hlist_node *node,
@@ -156,26 +176,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
if (st->fail == state) {
st->fail = CPUHP_INVALID;
-
- if (!(bringup ? step->startup.single : step->teardown.single))
- return 0;
-
return -EAGAIN;
}
+ if (cpuhp_step_empty(bringup, step)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single;
- if (!cb)
- return 0;
+
trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu);
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
cbm = bringup ? step->startup.multi : step->teardown.multi;
- if (!cbm)
- return 0;
/* Single invocation for instance add/remove */
if (node) {
@@ -329,14 +347,21 @@ void lockdep_assert_cpus_held(void)
percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
+#ifdef CONFIG_LOCKDEP
+int lockdep_is_cpus_held(void)
+{
+ return percpu_rwsem_is_held(&cpu_hotplug_lock);
+}
+#endif
+
static void lockdep_acquire_cpus_lock(void)
{
- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}
static void lockdep_release_cpus_lock(void)
{
- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
/*
@@ -432,7 +457,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
/*
* On x86 it's required to boot all logical CPUs at least once so
* that the init code can get a chance to set CR4.MCE on each
- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
* core will shutdown the machine.
*/
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
@@ -450,23 +475,38 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif
static inline enum cpuhp_state
-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
+ bool bringup = st->state < target;
st->rollback = false;
st->last = NULL;
st->target = target;
st->single = false;
- st->bringup = st->state < target;
+ st->bringup = bringup;
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);
return prev_state;
}
static inline void
-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state prev_state)
{
+ bool bringup = !st->bringup;
+
+ st->target = prev_state;
+
+ /*
+ * Already rolling back. No need invert the bringup value or to change
+ * the current state.
+ */
+ if (st->rollback)
+ return;
+
st->rollback = true;
/*
@@ -480,8 +520,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
st->state++;
}
- st->target = prev_state;
- st->bringup = !st->bringup;
+ st->bringup = bringup;
+ if (cpu_dying(cpu) != !bringup)
+ set_cpu_dying(cpu, !bringup);
}
/* Regular hotplug invocation of the AP hotplug thread */
@@ -501,15 +542,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
wait_for_ap_thread(st, st->bringup);
}
-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
{
enum cpuhp_state prev_state;
int ret;
- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
__cpuhp_kick_ap(st);
if ((ret = st->result)) {
- cpuhp_reset_state(st, prev_state);
+ cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
}
@@ -541,7 +583,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
- return cpuhp_kick_ap(st, st->target);
+ return cpuhp_kick_ap(cpu, st, st->target);
}
static int bringup_cpu(unsigned int cpu)
@@ -550,6 +592,12 @@ static int bringup_cpu(unsigned int cpu)
int ret;
/*
+ * Reset stale stack state from the last time this CPU was online.
+ */
+ scs_task_reset(idle);
+ kasan_unpoison_task_stack(idle);
+
+ /*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
* Prevent irq alloc/free across the bringup.
@@ -564,14 +612,72 @@ static int bringup_cpu(unsigned int cpu)
return bringup_wait_for_ap(cpu);
}
+static int finish_cpu(unsigned int cpu)
+{
+ struct task_struct *idle = idle_thread_get(cpu);
+ struct mm_struct *mm = idle->active_mm;
+
+ /*
+ * idle_task_exit() will have switched to &init_mm, now
+ * clean up any remaining active_mm state.
+ */
+ if (mm != &init_mm)
+ idle->active_mm = &init_mm;
+ mmdrop(mm);
+ return 0;
+}
+
/*
* Hotplug state machine related functions
*/
-static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
+/*
+ * Get the next state to run. Empty ones will be skipped. Returns true if a
+ * state must be run.
+ *
+ * st->state will be modified ahead of time, to match state_to_run, as if it
+ * has already ran.
+ */
+static bool cpuhp_next_state(bool bringup,
+ enum cpuhp_state *state_to_run,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
{
- for (st->state--; st->state > st->target; st->state--)
- cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+ do {
+ if (bringup) {
+ if (st->state >= target)
+ return false;
+
+ *state_to_run = ++st->state;
+ } else {
+ if (st->state <= target)
+ return false;
+
+ *state_to_run = st->state--;
+ }
+
+ if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
+ break;
+ } while (true);
+
+ return true;
+}
+
+static int cpuhp_invoke_callback_range(bool bringup,
+ unsigned int cpu,
+ struct cpuhp_cpu_state *st,
+ enum cpuhp_state target)
+{
+ enum cpuhp_state state;
+ int err = 0;
+
+ while (cpuhp_next_state(bringup, &state, st, target)) {
+ err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
+ if (err)
+ break;
+ }
+
+ return err;
}
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
@@ -594,16 +700,16 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state prev_state = st->state;
int ret = 0;
- while (st->state < target) {
- st->state++;
- ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
- if (ret) {
- if (can_rollback_cpu(st)) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
- }
- break;
- }
+ ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+ if (ret) {
+ pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+ cpuhp_reset_state(cpu, st, prev_state);
+ if (can_rollback_cpu(st))
+ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
+ prev_state));
}
return ret;
}
@@ -611,14 +717,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
/*
* The cpu hotplug threads manage the bringup and teardown of the cpus
*/
-static void cpuhp_create(unsigned int cpu)
-{
- struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-
- init_completion(&st->done_up);
- init_completion(&st->done_down);
-}
-
static int cpuhp_should_run(unsigned int cpu)
{
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
@@ -667,17 +765,9 @@ static void cpuhp_thread_fun(unsigned int cpu)
state = st->cb_state;
st->should_run = false;
} else {
- if (bringup) {
- st->state++;
- state = st->state;
- st->should_run = (st->state < st->target);
- WARN_ON_ONCE(st->state > st->target);
- } else {
- state = st->state;
- st->state--;
- st->should_run = (st->state > st->target);
- WARN_ON_ONCE(st->state < st->target);
- }
+ st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
+ if (!st->should_run)
+ goto end;
}
WARN_ON_ONCE(!cpuhp_is_ap_state(state));
@@ -705,6 +795,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false;
}
+end:
cpuhp_lock_release(bringup);
lockdep_release_cpus_lock();
@@ -777,7 +868,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
cpuhp_lock_release(true);
trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
- ret = cpuhp_kick_ap(st, st->target);
+ ret = cpuhp_kick_ap(cpu, st, st->target);
trace_cpuhp_exit(cpu, st->state, prev_state, ret);
return ret;
@@ -785,20 +876,82 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
static struct smp_hotplug_thread cpuhp_threads = {
.store = &cpuhp_state.thread,
- .create = &cpuhp_create,
.thread_should_run = cpuhp_should_run,
.thread_fn = cpuhp_thread_fun,
.thread_comm = "cpuhp/%u",
.selfparking = true,
};
+static __init void cpuhp_init_state(void)
+{
+ struct cpuhp_cpu_state *st;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ st = per_cpu_ptr(&cpuhp_state, cpu);
+ init_completion(&st->done_up);
+ init_completion(&st->done_down);
+ }
+}
+
void __init cpuhp_threads_init(void)
{
+ cpuhp_init_state();
BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}
+/*
+ *
+ * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
+ * protected region.
+ *
+ * The operation is still serialized against concurrent CPU hotplug via
+ * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
+ * serialized against other hotplug related activity like adding or
+ * removing of state callbacks and state instances, which invoke either the
+ * startup or the teardown callback of the affected state.
+ *
+ * This is required for subsystems which are unfixable vs. CPU hotplug and
+ * evade lock inversion problems by scheduling work which has to be
+ * completed _before_ cpu_up()/_cpu_down() returns.
+ *
+ * Don't even think about adding anything to this for any new code or even
+ * drivers. It's only purpose is to keep existing lock order trainwrecks
+ * working.
+ *
+ * For cpu_down() there might be valid reasons to finish cleanups which are
+ * not required to be done under cpu_hotplug_lock, but that's a different
+ * story and would be not invoked via this.
+ */
+static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
+{
+ /*
+ * cpusets delegate hotplug operations to a worker to "solve" the
+ * lock order problems. Wait for the worker, but only if tasks are
+ * _not_ frozen (suspend, hibernate) as that would wait forever.
+ *
+ * The wait is required because otherwise the hotplug operation
+ * returns with inconsistent state, which could even be observed in
+ * user space when a new CPU is brought up. The CPU plug uevent
+ * would be delivered and user space reacting on it would fail to
+ * move tasks to the newly plugged CPU up to the point where the
+ * work has finished because up to that point the newly plugged CPU
+ * is not assignable in cpusets/cgroups. On unplug that's not
+ * necessarily a visible issue, but it is still inconsistent state,
+ * which is the real problem which needs to be "fixed". This can't
+ * prevent the transient state between scheduling the work and
+ * returning from waiting for it.
+ */
+ if (!tasks_frozen)
+ cpuset_wait_for_hotplug();
+}
+
#ifdef CONFIG_HOTPLUG_CPU
+#ifndef arch_clear_mm_cpumask_cpu
+#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
+#endif
+
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
* @cpu: a CPU id
@@ -834,7 +987,7 @@ void clear_tasks_mm_cpumask(int cpu)
t = find_lock_task_mm(p);
if (!t)
continue;
- cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+ arch_clear_mm_cpumask_cpu(cpu, t->mm);
task_unlock(t);
}
rcu_read_unlock();
@@ -854,19 +1007,18 @@ static int take_cpu_down(void *_param)
return err;
/*
- * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
- * do this step again.
+ * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
+ * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
*/
- WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
- st->state--;
+ WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
+
/* Invoke the former CPU_DYING callbacks */
- for (; st->state > target; st->state--) {
- ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
- /*
- * DYING must not fail!
- */
- WARN_ON_ONCE(ret);
- }
+ ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+
+ /*
+ * DYING must not fail!
+ */
+ WARN_ON_ONCE(ret);
/* Give up timekeeping duties */
tick_handover_do_timer();
@@ -883,7 +1035,7 @@ static int takedown_cpu(unsigned int cpu)
int err;
/* Park the smpboot threads */
- kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ kthread_park(st->thread);
/*
* Prevent irq alloc/free while the dying cpu reorganizes the
@@ -899,7 +1051,7 @@ static int takedown_cpu(unsigned int cpu)
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
- kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ kthread_unpark(st->thread);
return err;
}
BUG_ON(cpu_online(cpu));
@@ -948,27 +1100,25 @@ void cpuhp_report_idle_dead(void)
cpuhp_complete_idle_dead, st, 0);
}
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
- for (st->state++; st->state < st->target; st->state++)
- cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-}
-
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
int ret = 0;
- for (; st->state > target; st->state--) {
- ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
- if (ret) {
- st->target = prev_state;
- if (st->state < prev_state)
- undo_cpu_down(cpu, st);
- break;
- }
+ ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+ if (ret) {
+ pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+ cpuhp_reset_state(cpu, st, prev_state);
+
+ if (st->state < prev_state)
+ WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
+ prev_state));
}
+
return ret;
}
@@ -989,7 +1139,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
cpuhp_tasks_frozen = tasks_frozen;
- prev_state = cpuhp_set_state(st, target);
+ prev_state = cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread.
@@ -1018,9 +1168,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, target);
- if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
- cpuhp_reset_state(st, prev_state);
- __cpuhp_kick_ap(st);
+ if (ret && st->state < prev_state) {
+ if (st->state == CPUHP_TEARDOWN_CPU) {
+ cpuhp_reset_state(cpu, st, prev_state);
+ __cpuhp_kick_ap(st);
+ } else {
+ WARN(1, "DEAD callback error for CPU%d", cpu);
+ }
}
out:
@@ -1031,17 +1185,24 @@ out:
*/
lockup_detector_cleanup();
arch_smt_update();
+ cpu_up_down_serialize_trainwrecks(tasks_frozen);
return ret;
}
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
+ /*
+ * If the platform does not support hotplug, report it explicitly to
+ * differentiate it from a transient offlining failure.
+ */
+ if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
+ return -EOPNOTSUPP;
if (cpu_hotplug_disabled)
return -EBUSY;
return _cpu_down(cpu, 0, target);
}
-static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
+static int cpu_down(unsigned int cpu, enum cpuhp_state target)
{
int err;
@@ -1051,11 +1212,74 @@ static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
return err;
}
-int cpu_down(unsigned int cpu)
+/**
+ * cpu_device_down - Bring down a cpu device
+ * @dev: Pointer to the cpu device to offline
+ *
+ * This function is meant to be used by device core cpu subsystem only.
+ *
+ * Other subsystems should use remove_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
+ */
+int cpu_device_down(struct device *dev)
{
- return do_cpu_down(cpu, CPUHP_OFFLINE);
+ return cpu_down(dev->id, CPUHP_OFFLINE);
+}
+
+int remove_cpu(unsigned int cpu)
+{
+ int ret;
+
+ lock_device_hotplug();
+ ret = device_offline(get_cpu_device(cpu));
+ unlock_device_hotplug();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(remove_cpu);
+
+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
+{
+ unsigned int cpu;
+ int error;
+
+ cpu_maps_update_begin();
+
+ /*
+ * Make certain the cpu I'm about to reboot on is online.
+ *
+ * This is inline to what migrate_to_reboot_cpu() already do.
+ */
+ if (!cpu_online(primary_cpu))
+ primary_cpu = cpumask_first(cpu_online_mask);
+
+ for_each_online_cpu(cpu) {
+ if (cpu == primary_cpu)
+ continue;
+
+ error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+ if (error) {
+ pr_err("Failed to offline CPU%d - error=%d",
+ cpu, error);
+ break;
+ }
+ }
+
+ /*
+ * Ensure all but the reboot CPU are offline.
+ */
+ BUG_ON(num_online_cpus() > 1);
+
+ /*
+ * Make sure the CPUs won't be enabled by someone else after this
+ * point. Kexec will reboot to a new kernel shortly resetting
+ * everything along the way.
+ */
+ cpu_hotplug_disabled++;
+
+ cpu_maps_update_done();
}
-EXPORT_SYMBOL(cpu_down);
#else
#define takedown_cpu NULL
@@ -1076,14 +1300,12 @@ void notify_cpu_starting(unsigned int cpu)
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
cpumask_set_cpu(cpu, &cpus_booted_once_mask);
- while (st->state < target) {
- st->state++;
- ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
- /*
- * STARTING must not fail!
- */
- WARN_ON_ONCE(ret);
- }
+ ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+
+ /*
+ * STARTING must not fail!
+ */
+ WARN_ON_ONCE(ret);
}
/*
@@ -1124,8 +1346,8 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
}
/*
- * The caller of do_cpu_up might have raced with another
- * caller. Ignore it for now.
+ * The caller of cpu_up() might have raced with another
+ * caller. Nothing to do.
*/
if (st->state >= target)
goto out;
@@ -1141,7 +1363,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
cpuhp_tasks_frozen = tasks_frozen;
- cpuhp_set_state(st, target);
+ cpuhp_set_state(cpu, st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread once more.
@@ -1166,10 +1388,11 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
out:
cpus_write_unlock();
arch_smt_update();
+ cpu_up_down_serialize_trainwrecks(tasks_frozen);
return ret;
}
-static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
+static int cpu_up(unsigned int cpu, enum cpuhp_state target)
{
int err = 0;
@@ -1203,11 +1426,69 @@ out:
return err;
}
-int cpu_up(unsigned int cpu)
+/**
+ * cpu_device_up - Bring up a cpu device
+ * @dev: Pointer to the cpu device to online
+ *
+ * This function is meant to be used by device core cpu subsystem only.
+ *
+ * Other subsystems should use add_cpu() instead.
+ *
+ * Return: %0 on success or a negative errno code
+ */
+int cpu_device_up(struct device *dev)
+{
+ return cpu_up(dev->id, CPUHP_ONLINE);
+}
+
+int add_cpu(unsigned int cpu)
+{
+ int ret;
+
+ lock_device_hotplug();
+ ret = device_online(get_cpu_device(cpu));
+ unlock_device_hotplug();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(add_cpu);
+
+/**
+ * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
+ * @sleep_cpu: The cpu we hibernated on and should be brought up.
+ *
+ * On some architectures like arm64, we can hibernate on any CPU, but on
+ * wake up the CPU we hibernated on might be offline as a side effect of
+ * using maxcpus= for example.
+ *
+ * Return: %0 on success or a negative errno code
+ */
+int bringup_hibernate_cpu(unsigned int sleep_cpu)
+{
+ int ret;
+
+ if (!cpu_online(sleep_cpu)) {
+ pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
+ ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
+ if (ret) {
+ pr_err("Failed to bring hibernate-CPU up!\n");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+void bringup_nonboot_cpus(unsigned int setup_max_cpus)
{
- return do_cpu_up(cpu, CPUHP_ONLINE);
+ unsigned int cpu;
+
+ for_each_present_cpu(cpu) {
+ if (num_online_cpus() >= setup_max_cpus)
+ break;
+ if (!cpu_online(cpu))
+ cpu_up(cpu, CPUHP_ONLINE);
+ }
}
-EXPORT_SYMBOL_GPL(cpu_up);
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
@@ -1219,8 +1500,8 @@ int freeze_secondary_cpus(int primary)
cpu_maps_update_begin();
if (primary == -1) {
primary = cpumask_first(cpu_online_mask);
- if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
- primary = housekeeping_any_cpu(HK_FLAG_TIMER);
+ if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
+ primary = housekeeping_any_cpu(HK_TYPE_TIMER);
} else {
if (!cpu_online(primary))
primary = cpumask_first(cpu_online_mask);
@@ -1261,8 +1542,8 @@ int freeze_secondary_cpus(int primary)
/*
* Make sure the CPUs won't be enabled by someone else. We need to do
- * this even in case of failure as all disable_nonboot_cpus() users are
- * supposed to do enable_nonboot_cpus() on the failure path.
+ * this even in case of failure as all freeze_secondary_cpus() users are
+ * supposed to do thaw_secondary_cpus() on the failure path.
*/
cpu_hotplug_disabled++;
@@ -1270,15 +1551,15 @@ int freeze_secondary_cpus(int primary)
return error;
}
-void __weak arch_enable_nonboot_cpus_begin(void)
+void __weak arch_thaw_secondary_cpus_begin(void)
{
}
-void __weak arch_enable_nonboot_cpus_end(void)
+void __weak arch_thaw_secondary_cpus_end(void)
{
}
-void enable_nonboot_cpus(void)
+void thaw_secondary_cpus(void)
{
int cpu, error;
@@ -1290,7 +1571,7 @@ void enable_nonboot_cpus(void)
pr_info("Enabling non-boot CPUs ...\n");
- arch_enable_nonboot_cpus_begin();
+ arch_thaw_secondary_cpus_begin();
for_each_cpu(cpu, frozen_cpus) {
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
@@ -1303,7 +1584,7 @@ void enable_nonboot_cpus(void)
pr_warn("Error taking CPU%d up: %d\n", cpu, error);
}
- arch_enable_nonboot_cpus_end();
+ arch_thaw_secondary_cpus_end();
cpumask_clear(frozen_cpus);
out:
@@ -1390,6 +1671,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = perf_event_init_cpu,
.teardown.single = perf_event_exit_cpu,
},
+ [CPUHP_RANDOM_PREPARE] = {
+ .name = "random:prepare",
+ .startup.single = random_prepare_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_WORKQUEUE_PREP] = {
.name = "workqueue:prepare",
.startup.single = workqueue_prepare_cpu,
@@ -1434,7 +1720,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
.startup.single = bringup_cpu,
- .teardown.single = NULL,
+ .teardown.single = finish_cpu,
.cant_stop = true,
},
/* Final state before CPU kills itself */
@@ -1471,7 +1757,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.name = "ap:online",
},
/*
- * Handled on controll processor until the plugged processor manages
+ * Handled on control processor until the plugged processor manages
* this itself.
*/
[CPUHP_TEARDOWN_CPU] = {
@@ -1480,6 +1766,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.teardown.single = takedown_cpu,
.cant_stop = true,
},
+
+ [CPUHP_AP_SCHED_WAIT_EMPTY] = {
+ .name = "sched:waitempty",
+ .startup.single = NULL,
+ .teardown.single = sched_cpu_wait_empty,
+ },
+
/* Handle smpboot threads park/unpark */
[CPUHP_AP_SMPBOOT_THREADS] = {
.name = "smpboot/threads:online",
@@ -1506,6 +1799,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = workqueue_online_cpu,
.teardown.single = workqueue_offline_cpu,
},
+ [CPUHP_AP_RANDOM_ONLINE] = {
+ .name = "random:online",
+ .startup.single = random_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_RCUTREE_ONLINE] = {
.name = "RCU/tree:online",
.startup.single = rcutree_online_cpu,
@@ -1628,8 +1926,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
* If there's nothing to do, we done.
* Relies on the union for multi_instance.
*/
- if ((bringup && !sp->startup.single) ||
- (!bringup && !sp->teardown.single))
+ if (cpuhp_step_empty(bringup, sp))
return 0;
/*
* The non AP bound callbacks can fail on bringup. On teardown
@@ -1731,6 +2028,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
+ * @name: Name of the step
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
* @startup: startup callback function
@@ -1739,9 +2037,9 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* added afterwards.
*
* The caller needs to hold cpus read locked while calling this function.
- * Returns:
+ * Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -1987,18 +2285,17 @@ int cpuhp_smt_enable(void)
#endif
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
-static ssize_t show_cpuhp_state(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->state);
}
-static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+static DEVICE_ATTR_RO(state);
-static ssize_t write_cpuhp_target(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t target_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2028,27 +2325,25 @@ static ssize_t write_cpuhp_target(struct device *dev,
goto out;
if (st->state < target)
- ret = do_cpu_up(dev->id, target);
+ ret = cpu_up(dev->id, target);
else
- ret = do_cpu_down(dev->id, target);
+ ret = cpu_down(dev->id, target);
out:
unlock_device_hotplug();
return ret ? ret : count;
}
-static ssize_t show_cpuhp_target(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t target_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->target);
}
-static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+static DEVICE_ATTR_RW(target);
-
-static ssize_t write_cpuhp_fail(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
struct cpuhp_step *sp;
@@ -2058,6 +2353,11 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (ret)
return ret;
+ if (fail == CPUHP_INVALID) {
+ st->fail = fail;
+ return count;
+ }
+
if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
return -EINVAL;
@@ -2068,6 +2368,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
return -EINVAL;
/*
+ * DEAD callbacks cannot fail...
+ * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
+ * triggering STARTING callbacks, a failure in this state would
+ * hinder rollback.
+ */
+ if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
+ return -EINVAL;
+
+ /*
* Cannot fail anything that doesn't have callbacks.
*/
mutex_lock(&cpuhp_state_mutex);
@@ -2083,15 +2392,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
return count;
}
-static ssize_t show_cpuhp_fail(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
return sprintf(buf, "%d\n", st->fail);
}
-static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+static DEVICE_ATTR_RW(fail);
static struct attribute *cpuhp_cpu_attrs[] = {
&dev_attr_state.attr,
@@ -2106,7 +2415,7 @@ static const struct attribute_group cpuhp_cpu_attr_group = {
NULL
};
-static ssize_t show_cpuhp_states(struct device *dev,
+static ssize_t states_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
ssize_t cur, res = 0;
@@ -2125,7 +2434,7 @@ static ssize_t show_cpuhp_states(struct device *dev,
mutex_unlock(&cpuhp_state_mutex);
return res;
}
-static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+static DEVICE_ATTR_RO(states);
static struct attribute *cpuhp_cpu_root_attrs[] = {
&dev_attr_states.attr,
@@ -2198,28 +2507,27 @@ static const char *smt_states[] = {
[CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
};
-static ssize_t
-show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t control_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
const char *state = smt_states[cpu_smt_control];
return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
}
-static ssize_t
-store_smt_control(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t control_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
return __store_smt_control(dev, attr, buf, count);
}
-static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+static DEVICE_ATTR_RW(control);
-static ssize_t
-show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t active_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
}
-static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+static DEVICE_ATTR_RO(active);
static struct attribute *cpuhp_smt_attrs[] = {
&dev_attr_control.attr,
@@ -2311,6 +2619,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
+struct cpumask __cpu_dying_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_dying_mask);
+
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);