aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r--kernel/rcu/tree_plugin.h426
1 files changed, 284 insertions, 142 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 00dc411e9676..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
if (nr_cpu_ids != NR_CPUS)
pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_NOCB_CPU
-#ifndef CONFIG_RCU_NOCB_CPU_NONE
- if (!have_rcu_nocb_mask) {
- zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
- have_rcu_nocb_mask = true;
- }
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
- pr_info("\tOffload RCU callbacks from CPU 0\n");
- cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
- pr_info("\tOffload RCU callbacks from all CPUs\n");
- cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
- if (have_rcu_nocb_mask) {
- if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
- pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
- cpumask_and(rcu_nocb_mask, cpu_possible_mask,
- rcu_nocb_mask);
- }
- cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
- pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
- if (rcu_nocb_poll)
- pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
- }
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
}
#ifdef CONFIG_TREE_PREEMPT_RCU
@@ -134,7 +107,7 @@ static void __init rcu_bootup_announce(void)
* Return the number of RCU-preempt batches processed thus far
* for debug and statistics.
*/
-long rcu_batches_completed_preempt(void)
+static long rcu_batches_completed_preempt(void)
{
return rcu_preempt_state.completed;
}
@@ -155,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
* not in a quiescent state. There might be any number of tasks blocked
* while in an RCU read-side critical section.
*
- * Unlike the other rcu_*_qs() functions, callers to this function
- * must disable irqs in order to protect the assignment to
- * ->rcu_read_unlock_special.
- */
-static void rcu_preempt_qs(int cpu)
-{
- struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-
- if (rdp->passed_quiesce == 0)
- trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
- rdp->passed_quiesce = 1;
- current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+ * As with the other rcu_*_qs() functions, callers to this function
+ * must disable preemption.
+ */
+static void rcu_preempt_qs(void)
+{
+ if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+ trace_rcu_grace_period(TPS("rcu_preempt"),
+ __this_cpu_read(rcu_preempt_data.gpnum),
+ TPS("cpuqs"));
+ __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+ barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+ current->rcu_read_unlock_special.b.need_qs = false;
+ }
}
/*
@@ -190,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
struct rcu_node *rnp;
if (t->rcu_read_lock_nesting > 0 &&
- (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+ !t->rcu_read_unlock_special.b.blocked) {
/* Possibly blocking in an RCU read-side critical section. */
rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();
- t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+ t->rcu_read_unlock_special.b.blocked = true;
t->rcu_blocked_node = rnp;
/*
@@ -239,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
: rnp->gpnum + 1);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
} else if (t->rcu_read_lock_nesting < 0 &&
- t->rcu_read_unlock_special) {
+ t->rcu_read_unlock_special.s) {
/*
* Complete exit from RCU read-side critical section on
@@ -257,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
* grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period.
*/
- local_irq_save(flags);
- rcu_preempt_qs(cpu);
- local_irq_restore(flags);
+ rcu_preempt_qs();
}
/*
@@ -340,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
bool drop_boost_mutex = false;
#endif /* #ifdef CONFIG_RCU_BOOST */
struct rcu_node *rnp;
- int special;
+ union rcu_special special;
/* NMI handlers cannot block and cannot safely manipulate state. */
if (in_nmi())
@@ -350,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
/*
* If RCU core is waiting for this CPU to exit critical section,
- * let it know that we have done so.
+ * let it know that we have done so. Because irqs are disabled,
+ * t->rcu_read_unlock_special cannot change.
*/
special = t->rcu_read_unlock_special;
- if (special & RCU_READ_UNLOCK_NEED_QS) {
- rcu_preempt_qs(smp_processor_id());
- if (!t->rcu_read_unlock_special) {
+ if (special.b.need_qs) {
+ rcu_preempt_qs();
+ if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
return;
}
@@ -368,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
}
/* Clean up if blocked during RCU read-side critical section. */
- if (special & RCU_READ_UNLOCK_BLOCKED) {
- t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+ if (special.b.blocked) {
+ t->rcu_read_unlock_special.b.blocked = false;
/*
* Remove this task from the list it blocked on. The
@@ -653,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0) {
- rcu_preempt_qs(cpu);
+ rcu_preempt_qs();
return;
}
if (t->rcu_read_lock_nesting > 0 &&
- per_cpu(rcu_preempt_data, cpu).qs_pending)
- t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+ per_cpu(rcu_preempt_data, cpu).qs_pending &&
+ !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
+ t->rcu_read_unlock_special.b.need_qs = true;
}
#ifdef CONFIG_RCU_BOOST
@@ -819,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
* In fact, if you are using synchronize_rcu_expedited() in a loop,
* please restructure your code to batch your updates, and then Use a
* single synchronize_rcu() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier. Failing to observe
- * these restriction will result in deadlock.
*/
void synchronize_rcu_expedited(void)
{
@@ -845,7 +814,11 @@ void synchronize_rcu_expedited(void)
* being boosted. This simplifies the process of moving tasks
* from leaf to root rcu_node structures.
*/
- get_online_cpus();
+ if (!try_get_online_cpus()) {
+ /* CPU-hotplug operation in flight, fall back to normal GP. */
+ wait_rcu_gp(call_rcu);
+ return;
+ }
/*
* Acquire lock, falling back to synchronize_rcu() if too many
@@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void)
/* Clean up and exit. */
smp_mb(); /* ensure expedited GP seen before counter increment. */
- ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
+ ACCESS_ONCE(sync_rcu_preempt_exp_count) =
+ sync_rcu_preempt_exp_count + 1;
unlock_mb_ret:
mutex_unlock(&sync_rcu_preempt_exp_mutex);
mb_ret:
@@ -941,7 +915,7 @@ void exit_rcu(void)
return;
t->rcu_read_lock_nesting = 1;
barrier();
- t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+ t->rcu_read_unlock_special.b.blocked = true;
__rcu_read_unlock();
}
@@ -1462,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = {
};
/*
- * Spawn all kthreads -- called as soon as the scheduler is running.
+ * Spawn boost kthreads -- called as soon as the scheduler is running.
*/
-static int __init rcu_spawn_kthreads(void)
+static void __init rcu_spawn_boost_kthreads(void)
{
struct rcu_node *rnp;
int cpu;
- rcu_scheduler_fully_active = 1;
for_each_possible_cpu(cpu)
per_cpu(rcu_cpu_has_work, cpu) = 0;
BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
@@ -1479,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void)
rcu_for_each_leaf_node(rcu_state_p, rnp)
(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
}
- return 0;
}
-early_initcall(rcu_spawn_kthreads);
static void rcu_prepare_kthreads(int cpu)
{
@@ -1519,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{
}
-static int __init rcu_scheduler_really_started(void)
+static void __init rcu_spawn_boost_kthreads(void)
{
- rcu_scheduler_fully_active = 1;
- return 0;
}
-early_initcall(rcu_scheduler_really_started);
static void rcu_prepare_kthreads(int cpu)
{
@@ -1625,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
/* Exit early if we advanced recently. */
if (jiffies == rdtp->last_advance_all)
- return 0;
+ return false;
rdtp->last_advance_all = jiffies;
for_each_rcu_flavor(rsp) {
@@ -1848,7 +1816,7 @@ static int rcu_oom_notify(struct notifier_block *self,
get_online_cpus();
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
- cond_resched();
+ cond_resched_rcu_qs();
}
put_online_cpus();
@@ -2074,9 +2042,9 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
return;
- if (!ACCESS_ONCE(rdp_leader->nocb_leader_wake) || force) {
- /* Prior xchg orders against prior callback enqueue. */
- ACCESS_ONCE(rdp_leader->nocb_leader_wake) = true;
+ if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
+ /* Prior smp_mb__after_atomic() orders against prior enqueue. */
+ ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
wake_up(&rdp_leader->nocb_wq);
}
}
@@ -2104,6 +2072,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
ACCESS_ONCE(*old_rhpp) = rhp;
atomic_long_add(rhcount, &rdp->nocb_q_count);
atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
+ smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
/* If we are not being polled and there is a kthread, awaken it ... */
t = ACCESS_ONCE(rdp->nocb_kthread);
@@ -2120,16 +2089,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- rdp->nocb_defer_wakeup = true;
+ rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("WakeEmptyIsDeferred"));
}
rdp->qlen_last_fqs_check = 0;
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
/* ... or if many callbacks queued. */
- wake_nocb_leader(rdp, true);
+ if (!irqs_disabled_flags(flags)) {
+ wake_nocb_leader(rdp, true);
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WakeOvf"));
+ } else {
+ rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WakeOvfIsDeferred"));
+ }
rdp->qlen_last_fqs_check = LONG_MAX / 2;
- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
} else {
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
}
@@ -2150,7 +2126,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
{
if (!rcu_is_nocb_cpu(rdp->cpu))
- return 0;
+ return false;
__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
if (__is_kfree_rcu_offset((unsigned long)rhp->func))
trace_rcu_kfree_callback(rdp->rsp->name, rhp,
@@ -2161,7 +2137,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
trace_rcu_callback(rdp->rsp->name, rhp,
-atomic_long_read(&rdp->nocb_q_count_lazy),
-atomic_long_read(&rdp->nocb_q_count));
- return 1;
+
+ /*
+ * If called from an extended quiescent state with interrupts
+ * disabled, invoke the RCU core in order to allow the idle-entry
+ * deferred-wakeup check to function.
+ */
+ if (irqs_disabled_flags(flags) &&
+ !rcu_is_watching() &&
+ cpu_online(smp_processor_id()))
+ invoke_rcu_core();
+
+ return true;
}
/*
@@ -2177,7 +2164,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
if (!rcu_is_nocb_cpu(smp_processor_id()))
- return 0;
+ return false;
rsp->qlen = 0;
rsp->qlen_lazy = 0;
@@ -2196,7 +2183,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
rsp->orphan_nxtlist = NULL;
rsp->orphan_nxttail = &rsp->orphan_nxtlist;
}
- return 1;
+ return true;
}
/*
@@ -2229,7 +2216,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
if (likely(d))
break;
- flush_signals(current);
+ WARN_ON(signal_pending(current));
trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
}
trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
@@ -2253,7 +2240,7 @@ wait_again:
if (!rcu_nocb_poll) {
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
wait_event_interruptible(my_rdp->nocb_wq,
- ACCESS_ONCE(my_rdp->nocb_leader_wake));
+ !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
/* Memory barrier handled by smp_mb() calls below and repoll. */
} else if (firsttime) {
firsttime = false; /* Don't drown trace log with "Poll"! */
@@ -2288,16 +2275,16 @@ wait_again:
if (!rcu_nocb_poll)
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
"WokeEmpty");
- flush_signals(current);
+ WARN_ON(signal_pending(current));
schedule_timeout_interruptible(1);
/* Rescan in case we were a victim of memory ordering. */
- my_rdp->nocb_leader_wake = false;
- smp_mb(); /* Ensure _wake false before scan. */
+ my_rdp->nocb_leader_sleep = true;
+ smp_mb(); /* Ensure _sleep true before scan. */
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
if (ACCESS_ONCE(rdp->nocb_head)) {
/* Found CB, so short-circuit next wait. */
- my_rdp->nocb_leader_wake = true;
+ my_rdp->nocb_leader_sleep = false;
break;
}
goto wait_again;
@@ -2307,17 +2294,17 @@ wait_again:
rcu_nocb_wait_gp(my_rdp);
/*
- * We left ->nocb_leader_wake set to reduce cache thrashing.
- * We clear it now, but recheck for new callbacks while
+ * We left ->nocb_leader_sleep unset to reduce cache thrashing.
+ * We set it now, but recheck for new callbacks while
* traversing our follower list.
*/
- my_rdp->nocb_leader_wake = false;
- smp_mb(); /* Ensure _wake false before scan of ->nocb_head. */
+ my_rdp->nocb_leader_sleep = true;
+ smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */
/* Each pass through the following loop wakes a follower, if needed. */
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
if (ACCESS_ONCE(rdp->nocb_head))
- my_rdp->nocb_leader_wake = true; /* No need to wait. */
+ my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
if (!rdp->nocb_gp_head)
continue; /* No CBs, so no need to wake follower. */
@@ -2327,6 +2314,7 @@ wait_again:
atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
atomic_long_add(rdp->nocb_gp_count_lazy,
&rdp->nocb_follower_count_lazy);
+ smp_mb__after_atomic(); /* Store *tail before wakeup. */
if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
/*
* List was empty, wake up the follower.
@@ -2367,7 +2355,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
if (!rcu_nocb_poll)
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
"WokeEmpty");
- flush_signals(current);
+ WARN_ON(signal_pending(current));
schedule_timeout_interruptible(1);
}
}
@@ -2428,15 +2416,16 @@ static int rcu_nocb_kthread(void *arg)
list = next;
}
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
- ACCESS_ONCE(rdp->nocb_p_count) -= c;
- ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
+ ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c;
+ ACCESS_ONCE(rdp->nocb_p_count_lazy) =
+ rdp->nocb_p_count_lazy - cl;
rdp->n_nocbs_invoked += c;
}
return 0;
}
/* Is a deferred wakeup of rcu_nocb_kthread() required? */
-static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
{
return ACCESS_ONCE(rdp->nocb_defer_wakeup);
}
@@ -2444,11 +2433,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
/* Do a deferred wakeup of rcu_nocb_kthread(). */
static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
{
+ int ndw;
+
if (!rcu_nocb_need_deferred_wakeup(rdp))
return;
- ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
- wake_nocb_leader(rdp, false);
- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
+ ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
+ ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
+ wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
+}
+
+void __init rcu_init_nohz(void)
+{
+ int cpu;
+ bool need_rcu_nocb_mask = true;
+ struct rcu_state *rsp;
+
+#ifdef CONFIG_RCU_NOCB_CPU_NONE
+ need_rcu_nocb_mask = false;
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
+
+#if defined(CONFIG_NO_HZ_FULL)
+ if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+ need_rcu_nocb_mask = true;
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+ if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
+ if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
+ pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
+ return;
+ }
+ have_rcu_nocb_mask = true;
+ }
+ if (!have_rcu_nocb_mask)
+ return;
+
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+ pr_info("\tOffload RCU callbacks from CPU 0\n");
+ cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+ pr_info("\tOffload RCU callbacks from all CPUs\n");
+ cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#if defined(CONFIG_NO_HZ_FULL)
+ if (tick_nohz_full_running)
+ cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+ if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+ pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+ cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+ rcu_nocb_mask);
+ }
+ cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
+ pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
+ if (rcu_nocb_poll)
+ pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+
+ for_each_rcu_flavor(rsp) {
+ for_each_cpu(cpu, rcu_nocb_mask) {
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+
+ /*
+ * If there are early callbacks, they will need
+ * to be moved to the nocb lists.
+ */
+ WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
+ &rdp->nxtlist &&
+ rdp->nxttail[RCU_NEXT_TAIL] != NULL);
+ init_nocb_callback_list(rdp);
+ }
+ rcu_organize_nocb_kthreads(rsp);
+ }
}
/* Initialize per-rcu_data variables for no-CBs CPUs. */
@@ -2459,15 +2516,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
}
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are
+ * brought online out of order, this can require re-organizing the
+ * leader-follower relationships.
+ */
+static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
+{
+ struct rcu_data *rdp;
+ struct rcu_data *rdp_last;
+ struct rcu_data *rdp_old_leader;
+ struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
+ struct task_struct *t;
+
+ /*
+ * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
+ * then nothing to do.
+ */
+ if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)
+ return;
+
+ /* If we didn't spawn the leader first, reorganize! */
+ rdp_old_leader = rdp_spawn->nocb_leader;
+ if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
+ rdp_last = NULL;
+ rdp = rdp_old_leader;
+ do {
+ rdp->nocb_leader = rdp_spawn;
+ if (rdp_last && rdp != rdp_spawn)
+ rdp_last->nocb_next_follower = rdp;
+ rdp_last = rdp;
+ rdp = rdp->nocb_next_follower;
+ rdp_last->nocb_next_follower = NULL;
+ } while (rdp);
+ rdp_spawn->nocb_next_follower = rdp_old_leader;
+ }
+
+ /* Spawn the kthread for this CPU and RCU flavor. */
+ t = kthread_run(rcu_nocb_kthread, rdp_spawn,
+ "rcuo%c/%d", rsp->abbr, cpu);
+ BUG_ON(IS_ERR(t));
+ ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
+}
+
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo kthreads, spawn them.
+ */
+static void rcu_spawn_all_nocb_kthreads(int cpu)
+{
+ struct rcu_state *rsp;
+
+ if (rcu_scheduler_fully_active)
+ for_each_rcu_flavor(rsp)
+ rcu_spawn_one_nocb_kthread(rsp, cpu);
+}
+
+/*
+ * Once the scheduler is running, spawn rcuo kthreads for all online
+ * no-CBs CPUs. This assumes that the early_initcall()s happen before
+ * non-boot CPUs come online -- if this changes, we will need to add
+ * some mutual exclusion.
+ */
+static void __init rcu_spawn_nocb_kthreads(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ rcu_spawn_all_nocb_kthreads(cpu);
+}
+
/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */
static int rcu_nocb_leader_stride = -1;
module_param(rcu_nocb_leader_stride, int, 0444);
/*
- * Create a kthread for each RCU flavor for each no-CBs CPU.
- * Also initialize leader-follower relationships.
+ * Initialize leader-follower relationships for all no-CBs CPU.
*/
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
{
int cpu;
int ls = rcu_nocb_leader_stride;
@@ -2475,14 +2602,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
struct rcu_data *rdp;
struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */
struct rcu_data *rdp_prev = NULL;
- struct task_struct *t;
- if (rcu_nocb_mask == NULL)
+ if (!have_rcu_nocb_mask)
return;
-#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
- if (tick_nohz_full_running)
- cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
if (ls == -1) {
ls = int_sqrt(nr_cpu_ids);
rcu_nocb_leader_stride = ls;
@@ -2505,21 +2627,15 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
rdp_prev->nocb_next_follower = rdp;
}
rdp_prev = rdp;
-
- /* Spawn the kthread for this CPU. */
- t = kthread_run(rcu_nocb_kthread, rdp,
- "rcuo%c/%d", rsp->abbr, cpu);
- BUG_ON(IS_ERR(t));
- ACCESS_ONCE(rdp->nocb_kthread) = t;
}
}
/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
static bool init_nocb_callback_list(struct rcu_data *rdp)
{
- if (rcu_nocb_mask == NULL ||
- !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
+ if (!rcu_is_nocb_cpu(rdp->cpu))
return false;
+
rdp->nxttail[RCU_NEXT_TAIL] = NULL;
return true;
}
@@ -2541,21 +2657,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy, unsigned long flags)
{
- return 0;
+ return false;
}
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp,
unsigned long flags)
{
- return 0;
+ return false;
}
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
}
-static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
{
return false;
}
@@ -2564,7 +2680,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
{
}
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+static void rcu_spawn_all_nocb_kthreads(int cpu)
+{
+}
+
+static void __init rcu_spawn_nocb_kthreads(void)
{
}
@@ -2595,16 +2715,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-/*
- * Define RCU flavor that holds sysidle state. This needs to be the
- * most active flavor of RCU.
- */
-#ifdef CONFIG_PREEMPT_RCU
-static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
-#else /* #ifdef CONFIG_PREEMPT_RCU */
-static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-
static int full_sysidle_state; /* Current system-idle state. */
#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
@@ -2622,6 +2732,10 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
{
unsigned long j;
+ /* If there are no nohz_full= CPUs, no need to track this. */
+ if (!tick_nohz_full_enabled())
+ return;
+
/* Adjust nesting, check for fully idle. */
if (irq) {
rdtp->dynticks_idle_nesting--;
@@ -2687,6 +2801,10 @@ void rcu_sysidle_force_exit(void)
*/
static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
{
+ /* If there are no nohz_full= CPUs, no need to track this. */
+ if (!tick_nohz_full_enabled())
+ return;
+
/* Adjust nesting, check for already non-idle. */
if (irq) {
rdtp->dynticks_idle_nesting++;
@@ -2741,12 +2859,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
unsigned long j;
struct rcu_dynticks *rdtp = rdp->dynticks;
+ /* If there are no nohz_full= CPUs, don't check system-wide idleness. */
+ if (!tick_nohz_full_enabled())
+ return;
+
/*
* If some other CPU has already reported non-idle, if this is
* not the flavor of RCU that tracks sysidle state, or if this
* is an offline or the timekeeping CPU, nothing to do.
*/
- if (!*isidle || rdp->rsp != rcu_sysidle_state ||
+ if (!*isidle || rdp->rsp != rcu_state_p ||
cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
return;
if (rcu_gp_in_progress(rdp->rsp))
@@ -2772,7 +2894,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
*/
static bool is_sysidle_rcu_state(struct rcu_state *rsp)
{
- return rsp == rcu_sysidle_state;
+ return rsp == rcu_state_p;
}
/*
@@ -2850,7 +2972,7 @@ static void rcu_sysidle_cancel(void)
static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
unsigned long maxj, bool gpkt)
{
- if (rsp != rcu_sysidle_state)
+ if (rsp != rcu_state_p)
return; /* Wrong flavor, ignore. */
if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
return; /* Running state machine from timekeeping CPU. */
@@ -2867,6 +2989,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
unsigned long maxj)
{
+ /* If there are no nohz_full= CPUs, no need to track this. */
+ if (!tick_nohz_full_enabled())
+ return;
+
rcu_sysidle_report(rsp, isidle, maxj, true);
}
@@ -2893,7 +3019,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
/*
* Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.
+ * The caller must have disabled interrupts. This is not intended to be
+ * called unless tick_nohz_full_enabled().
*/
bool rcu_sys_is_idle(void)
{
@@ -2919,13 +3046,12 @@ bool rcu_sys_is_idle(void)
/* Scan all the CPUs looking for nonidle CPUs. */
for_each_possible_cpu(cpu) {
- rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
+ rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
if (!isidle)
break;
}
- rcu_sysidle_report(rcu_sysidle_state,
- isidle, maxj, false);
+ rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
oldrss = rss;
rss = ACCESS_ONCE(full_sysidle_state);
}
@@ -2952,7 +3078,7 @@ bool rcu_sys_is_idle(void)
* provided by the memory allocator.
*/
if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
- !rcu_gp_in_progress(rcu_sysidle_state) &&
+ !rcu_gp_in_progress(rcu_state_p) &&
!rsh.inuse && xchg(&rsh.inuse, 1) == 0)
call_rcu(&rsh.rh, rcu_sysidle_cb);
return false;
@@ -3036,3 +3162,19 @@ static void rcu_bind_gp_kthread(void)
housekeeping_affine(current);
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
}
+
+/* Record the current task on dyntick-idle entry. */
+static void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Record no current task on dyntick-idle exit. */
+static void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}