From 01b4c39901e087ceebae2733857248de81476bd8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Jul 2019 15:22:59 +0200 Subject: nohz: Add TICK_DEP_BIT_RCU If a nohz_full CPU is looping in the kernel, the scheduling-clock tick might nevertheless remain disabled. In !PREEMPT kernels, this can prevent RCU's attempts to enlist the aid of that CPU's executions of cond_resched(), which can in turn result in an arbitrarily delayed grace period and thus an OOM. RCU therefore needs a way to enable a holdout nohz_full CPU's scheduler-clock interrupt. This commit therefore provides a new TICK_DEP_BIT_RCU value which RCU can pass to tick_dep_set_cpu() and friends to force on the scheduler-clock interrupt for a specified CPU or task. In some cases, rcutorture needs to turn on the scheduler-clock tick, so this commit also exports the relevant symbols to GPL-licensed modules. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 7 ++++++- include/trace/events/timer.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index f92a10b5e112..39eb44564058 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -108,7 +108,8 @@ enum tick_dep_bits { TICK_DEP_BIT_POSIX_TIMER = 0, TICK_DEP_BIT_PERF_EVENTS = 1, TICK_DEP_BIT_SCHED = 2, - TICK_DEP_BIT_CLOCK_UNSTABLE = 3 + TICK_DEP_BIT_CLOCK_UNSTABLE = 3, + TICK_DEP_BIT_RCU = 4 }; #define TICK_DEP_MASK_NONE 0 @@ -116,6 +117,7 @@ enum tick_dep_bits { #define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) +#define TICK_DEP_MASK_RCU (1 << TICK_DEP_BIT_RCU) #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; @@ -268,6 +270,9 @@ static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } +static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } + static inline void tick_dep_set(enum tick_dep_bits bit) { } static inline void tick_dep_clear(enum tick_dep_bits bit) { } static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index b7a904825e7d..295517f109d7 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -367,7 +367,8 @@ TRACE_EVENT(itimer_expire, tick_dep_name(POSIX_TIMER) \ tick_dep_name(PERF_EVENTS) \ tick_dep_name(SCHED) \ - tick_dep_name_end(CLOCK_UNSTABLE) + tick_dep_name(CLOCK_UNSTABLE) \ + tick_dep_name_end(RCU) #undef tick_dep_name #undef tick_dep_mask_name -- cgit v1.2.3-59-g8ed1b From 366237e7b0833faa2d8da7a8d7d7da8c3ca802e5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 Jul 2019 08:01:01 -0700 Subject: stop_machine: Provide RCU quiescent state in multi_cpu_stop() When multi_cpu_stop() loops waiting for other tasks, it can trigger an RCU CPU stall warning. This can be misleading because what is instead needed is information on whatever task is blocking multi_cpu_stop(). This commit therefore inserts an RCU quiescent state into the multi_cpu_stop() function's waitloop. Signed-off-by: Paul E. McKenney --- include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 2 +- kernel/stop_machine.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 18b1ed9864b0..c5147de885ec 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,6 +37,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); +void rcu_momentary_dyntick_idle(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 238f93b4b0a4..a5c296d202ae 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -364,7 +364,7 @@ bool rcu_eqs_special_set(int cpu) * * The caller must have disabled interrupts and must not be idle. */ -static void __maybe_unused rcu_momentary_dyntick_idle(void) +void rcu_momentary_dyntick_idle(void) { int special; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c7031a22aa7b..34c4f117d8c7 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -233,6 +233,7 @@ static int multi_cpu_stop(void *data) */ touch_nmi_watchdog(); } + rcu_momentary_dyntick_idle(); } while (curstate != MULTI_STOP_EXIT); local_irq_restore(flags); -- cgit v1.2.3-59-g8ed1b From 79ba7ff5a9925f5c170f51ed7a96d1475eb6c27f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Aug 2019 13:17:35 -0700 Subject: rcutorture: Emulate dyntick aspect of userspace nohz_full sojourn During an actual call_rcu() flood, there would be frequent trips to userspace (in-kernel call_rcu() floods must be otherwise housebroken). Userspace execution on nohz_full CPUs implies an RCU dyntick idle/not-idle transition pair, so this commit adds emulation of that pair. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 1 + kernel/rcu/rcutorture.c | 11 +++++++++++ kernel/rcu/tree.c | 1 + 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9bf1dfe7781f..37b6f0c2b79d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -84,6 +84,7 @@ static inline void rcu_scheduler_starting(void) { } #endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } +static inline void rcu_momentary_dyntick_idle(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ab61f5c1353b..49ad88765ed2 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1761,6 +1761,11 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) kfree(rfcp); freed++; rcu_torture_fwd_prog_cond_resched(freed); + if (tick_nohz_full_enabled()) { + local_irq_save(flags); + rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } } return freed; } @@ -1835,6 +1840,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) static void rcu_torture_fwd_prog_cr(void) { unsigned long cver; + unsigned long flags; unsigned long gps; int i; long n_launders; @@ -1894,6 +1900,11 @@ static void rcu_torture_fwd_prog_cr(void) } cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); + if (tick_nohz_full_enabled()) { + local_irq_save(flags); + rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } } stoppedat = jiffies; n_launders_cb_snap = READ_ONCE(n_launders_cb); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7c67ea561b36..66354ef776aa 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -375,6 +375,7 @@ void rcu_momentary_dyntick_idle(void) WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); rcu_preempt_deferred_qs(current); } +EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); /** * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle -- cgit v1.2.3-59-g8ed1b From 8e6af017f4b1da9cdd2b55ce83853df8e167b4d3 Mon Sep 17 00:00:00 2001 From: Ethan Hansen <1ethanhansen@gmail.com> Date: Fri, 2 Aug 2019 13:37:58 -0700 Subject: rcu: Remove unused function hlist_bl_del_init_rcu() The function hlist_bl_del_init_rcu() is declared in rculist_bl.h, but never used. This commit therefore removes it. Signed-off-by: Ethan Hansen <1ethanhansen@gmail.com> Signed-off-by: Paul E. McKenney --- include/linux/rculist_bl.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index 66e73ec1aa99..0b952d06eb0b 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -24,34 +24,6 @@ static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } -/** - * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization - * @n: the element to delete from the hash list. - * - * Note: hlist_bl_unhashed() on the node returns true after this. It is - * useful for RCU based read lockfree traversal if the writer side - * must know if the list entry is still hashed or already unhashed. - * - * In particular, it means that we can not poison the forward pointers - * that may still be used for walking the hash list and we can only - * zero the pprev pointer so list_unhashed() will return true after - * this. - * - * The caller must take whatever precautions are necessary (such as - * holding appropriate locks) to avoid racing with another - * list-mutation primitive, such as hlist_bl_add_head_rcu() or - * hlist_bl_del_rcu(), running on this same list. However, it is - * perfectly legal to run concurrently with the _rcu list-traversal - * primitives, such as hlist_bl_for_each_entry_rcu(). - */ -static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n) -{ - if (!hlist_bl_unhashed(n)) { - __hlist_bl_del(n); - n->pprev = NULL; - } -} - /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. -- cgit v1.2.3-59-g8ed1b From 7eb54685c63cc9185a48d0d9c1ad25a34d4e1da0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Aug 2019 16:55:21 -0700 Subject: rcu: Remove obsolete descriptions for rcu_barrier tracepoint Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 694bd040cf51..afa898532ab8 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -713,8 +713,6 @@ TRACE_EVENT_RCU(rcu_torture_read, * "Begin": rcu_barrier() started. * "EarlyExit": rcu_barrier() piggybacked, thus early exit. * "Inc1": rcu_barrier() piggyback check counter incremented. - * "OfflineNoCB": rcu_barrier() found callback on never-online CPU - * "OnlineNoCB": rcu_barrier() found online no-CBs CPU. * "OnlineQ": rcu_barrier() found online CPU with callbacks. * "OnlineNQ": rcu_barrier() found online CPU, no callbacks. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. -- cgit v1.2.3-59-g8ed1b From d01f86206864e429839f6a4aeb90064f0c043ed9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Aug 2019 10:29:06 -0700 Subject: rcu: Update descriptions for rcu_nocb_wake tracepoint Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index afa898532ab8..4609f2ef7767 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -258,20 +258,27 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock, * the number of the offloaded CPU are extracted. The third and final * argument is a string as follows: * - * "WakeEmpty": Wake rcuo kthread, first CB to empty list. - * "WakeEmptyIsDeferred": Wake rcuo kthread later, first CB to empty list. - * "WakeOvf": Wake rcuo kthread, CB list is huge. - * "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge. - * "WakeNot": Don't wake rcuo kthread. - * "WakeNotPoll": Don't wake rcuo kthread because it is polling. - * "DeferredWake": Carried out the "IsDeferred" wakeup. - * "Poll": Start of new polling cycle for rcu_nocb_poll. - * "Sleep": Sleep waiting for GP for !rcu_nocb_poll. - * "CBSleep": Sleep waiting for CBs for !rcu_nocb_poll. - * "WokeEmpty": rcuo kthread woke to find empty list. - * "WokeNonEmpty": rcuo kthread woke to find non-empty list. - * "WaitQueue": Enqueue partially done, timed wait for it to complete. - * "WokeQueue": Partial enqueue now complete. + * "AlreadyAwake": The to-be-awakened rcuo kthread is already awake. + * "Bypass": rcuo GP kthread sees non-empty ->nocb_bypass. + * "CBSleep": rcuo CB kthread sleeping waiting for CBs. + * "Check": rcuo GP kthread checking specified CPU for work. + * "DeferredWake": Timer expired or polled check, time to wake. + * "DoWake": The to-be-awakened rcuo kthread needs to be awakened. + * "EndSleep": Done waiting for GP for !rcu_nocb_poll. + * "FirstBQ": New CB to empty ->nocb_bypass (->cblist maybe non-empty). + * "FirstBQnoWake": FirstBQ plus rcuo kthread need not be awakened. + * "FirstBQwake": FirstBQ plus rcuo kthread must be awakened. + * "FirstQ": New CB to empty ->cblist (->nocb_bypass maybe non-empty). + * "NeedWaitGP": rcuo GP kthread must wait on a grace period. + * "Poll": Start of new polling cycle for rcu_nocb_poll. + * "Sleep": Sleep waiting for GP for !rcu_nocb_poll. + * "Timer": Deferred-wake timer expired. + * "WakeEmptyIsDeferred": Wake rcuo kthread later, first CB to empty list. + * "WakeEmpty": Wake rcuo kthread, first CB to empty list. + * "WakeNot": Don't wake rcuo kthread. + * "WakeNotPoll": Don't wake rcuo kthread because it is polling. + * "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge. + * "WokeEmpty": rcuo CB kthread woke to find empty list. */ TRACE_EVENT_RCU(rcu_nocb_wake, -- cgit v1.2.3-59-g8ed1b From 7cc0fffde6e4ff76be20d41a3577012fe584a559 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Aug 2019 10:34:25 -0700 Subject: rcu: Update descriptions for rcu_future_grace_period tracepoint Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 4609f2ef7767..66122602bd08 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -93,16 +93,16 @@ TRACE_EVENT_RCU(rcu_grace_period, * the data from the rcu_node structure, other than rcuname, which comes * from the rcu_state structure, and event, which is one of the following: * - * "Startleaf": Request a grace period based on leaf-node data. + * "Cleanup": Clean up rcu_node structure after previous GP. + * "CleanupMore": Clean up, and another GP is needed. + * "EndWait": Complete wait. + * "NoGPkthread": The RCU grace-period kthread has not yet started. * "Prestarted": Someone beat us to the request * "Startedleaf": Leaf node marked for future GP. * "Startedleafroot": All nodes from leaf to root marked for future GP. * "Startedroot": Requested a nocb grace period based on root-node data. - * "NoGPkthread": The RCU grace-period kthread has not yet started. + * "Startleaf": Request a grace period based on leaf-node data. * "StartWait": Start waiting for the requested grace period. - * "EndWait": Complete wait. - * "Cleanup": Clean up rcu_node structure after previous GP. - * "CleanupMore": Clean up, and another GP is needed. */ TRACE_EVENT_RCU(rcu_future_grace_period, -- cgit v1.2.3-59-g8ed1b From a63fc6b75cca984c71f095282e0227a390ba88f3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Sep 2019 15:05:11 -0700 Subject: rcu: Upgrade rcu_swap_protected() to rcu_replace_pointer() Although the rcu_swap_protected() macro follows the example of swap(), the interactions with RCU make its update of its argument somewhat counter-intuitive. This commit therefore introduces an rcu_replace_pointer() that returns the old value of the RCU pointer instead of doing the argument update. Once all the uses of rcu_swap_protected() are updated to instead use rcu_replace_pointer(), rcu_swap_protected() will be removed. Link: https://lore.kernel.org/lkml/CAHk-=wiAsJLw1egFEE=Z7-GGtM6wcvtyytXZA1+BHqta4gg6Hw@mail.gmail.com/ Reported-by: Linus Torvalds [ paulmck: From rcu_replace() to rcu_replace_pointer() per Ingo Molnar. ] Signed-off-by: Paul E. McKenney Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Shane M Seymour Cc: Martin K. Petersen --- include/linux/rcupdate.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 75a2eded7aa2..185dd9736863 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -382,6 +382,24 @@ do { \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ } while (0) +/** + * rcu_replace_pointer() - replace an RCU pointer, returning its old value + * @rcu_ptr: RCU pointer, whose old value is returned + * @ptr: regular pointer + * @c: the lockdep conditions under which the dereference will take place + * + * Perform a replacement, where @rcu_ptr is an RCU-annotated + * pointer and @c is the lockdep argument that is passed to the + * rcu_dereference_protected() call used to read that pointer. The old + * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. + */ +#define rcu_replace_pointer(rcu_ptr, ptr, c) \ +({ \ + typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + __tmp; \ +}) + /** * rcu_swap_protected() - swap an RCU and a regular pointer * @rcu_ptr: RCU pointer -- cgit v1.2.3-59-g8ed1b