From 6cf539a87a61a4fbc43f625267dbcbcf283872ed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 9 Oct 2019 17:57:43 +0200 Subject: rcu: Fix data-race due to atomic_t copy-by-value This fixes a data-race where `atomic_t dynticks` is copied by value. The copy is performed non-atomically, resulting in a data-race if `dynticks` is updated concurrently. This data-race was found with KCSAN: ================================================================== BUG: KCSAN: data-race in dyntick_save_progress_counter / rcu_irq_enter write to 0xffff989dbdbe98e0 of 4 bytes by task 10 on cpu 3: atomic_add_return include/asm-generic/atomic-instrumented.h:78 [inline] rcu_dynticks_snap kernel/rcu/tree.c:310 [inline] dyntick_save_progress_counter+0x43/0x1b0 kernel/rcu/tree.c:984 force_qs_rnp+0x183/0x200 kernel/rcu/tree.c:2286 rcu_gp_fqs kernel/rcu/tree.c:1601 [inline] rcu_gp_fqs_loop+0x71/0x880 kernel/rcu/tree.c:1653 rcu_gp_kthread+0x22c/0x3b0 kernel/rcu/tree.c:1799 kthread+0x1b5/0x200 kernel/kthread.c:255 read to 0xffff989dbdbe98e0 of 4 bytes by task 154 on cpu 7: rcu_nmi_enter_common kernel/rcu/tree.c:828 [inline] rcu_irq_enter+0xda/0x240 kernel/rcu/tree.c:870 irq_enter+0x5/0x50 kernel/softirq.c:347 Reported by Kernel Concurrency Sanitizer on: CPU: 7 PID: 154 Comm: kworker/7:1H Not tainted 5.3.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: kblockd blk_mq_run_work_fn ================================================================== Signed-off-by: Marco Elver Cc: Paul E. McKenney Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Ingo Molnar Cc: Dmitry Vyukov Cc: rcu@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 66122602bd08..697e2c0624dc 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -449,7 +449,7 @@ TRACE_EVENT_RCU(rcu_fqs, */ TRACE_EVENT_RCU(rcu_dyntick, - TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks), + TP_PROTO(const char *polarity, long oldnesting, long newnesting, int dynticks), TP_ARGS(polarity, oldnesting, newnesting, dynticks), @@ -464,7 +464,7 @@ TRACE_EVENT_RCU(rcu_dyntick, __entry->polarity = polarity; __entry->oldnesting = oldnesting; __entry->newnesting = newnesting; - __entry->dynticks = atomic_read(&dynticks); + __entry->dynticks = dynticks; ), TP_printk("%s %lx %lx %#3x", __entry->polarity, -- cgit v1.2.3-59-g8ed1b From b3e627d3d5092a87fc9b9e37e341610cfecfbfdc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Oct 2019 02:55:57 +0000 Subject: rcu: Make PREEMPT_RCU be a modifier to TREE_RCU Currently PREEMPT_RCU and TREE_RCU are mutually exclusive Kconfig options. But PREEMPT_RCU actually specifies a kind of TREE_RCU, namely a preemptible TREE_RCU. This commit therefore makes PREEMPT_RCU be a modifer to the TREE_RCU Kconfig option. This has the benefit of simplifying several of the #if expressions that formerly needed to check both, but now need only check one or the other. Signed-off-by: Lai Jiangshan Signed-off-by: Lai Jiangshan Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/trace/events/rcu.h | 4 ++-- kernel/rcu/Kconfig | 13 +++++++------ kernel/rcu/Makefile | 1 - kernel/rcu/rcu.h | 2 +- kernel/rcu/update.c | 2 +- kernel/sysctl.c | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/trace') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0b7506330c87..70a41cd8f58d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -167,7 +167,7 @@ do { \ * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) #include #elif defined(CONFIG_TINY_RCU) #include @@ -601,7 +601,7 @@ do { \ * read-side critical section that would block in a !PREEMPT kernel. * But if you want the full story, read on! * - * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), + * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 66122602bd08..85019cf4ed6c 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -41,7 +41,7 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) /* * Tracepoint for grace-period events. Takes a string identifying the @@ -432,7 +432,7 @@ TRACE_EVENT_RCU(rcu_fqs, __entry->cpu, __entry->qsevent) ); -#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) */ +#endif /* #if defined(CONFIG_TREE_RCU) */ /* * Tracepoint for dyntick-idle entry/exit events. These take a string diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 7644eda17d62..0303934e6ef0 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -7,7 +7,7 @@ menu "RCU Subsystem" config TREE_RCU bool - default y if !PREEMPTION && SMP + default y if SMP help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -17,6 +17,7 @@ config TREE_RCU config PREEMPT_RCU bool default y if PREEMPTION + select TREE_RCU help This option selects the RCU implementation that is designed for very large SMP systems with hundreds or @@ -78,7 +79,7 @@ config TASKS_RCU user-mode execution as quiescent states. config RCU_STALL_COMMON - def_bool ( TREE_RCU || PREEMPT_RCU ) + def_bool TREE_RCU help This option enables RCU CPU stall code that is common between the TINY and TREE variants of RCU. The purpose is to allow @@ -86,13 +87,13 @@ config RCU_STALL_COMMON making these warnings mandatory for the tree variants. config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU ) + def_bool ( TREE_RCU || TREE_SRCU ) config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + depends on TREE_RCU && RCU_EXPERT default 64 if 64BIT default 32 if !64BIT help @@ -112,7 +113,7 @@ config RCU_FANOUT_LEAF int "Tree-based hierarchical RCU leaf-level fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + depends on TREE_RCU && RCU_EXPERT default 16 help This option controls the leaf-level fanout of hierarchical @@ -187,7 +188,7 @@ config RCU_BOOST_DELAY config RCU_NOCB_CPU bool "Offload RCU callback processing from boot-selected CPUs" - depends on TREE_RCU || PREEMPT_RCU + depends on TREE_RCU depends on RCU_EXPERT || NO_HZ_FULL default n help diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 020e8b6a644b..82d5fba48b2f 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -9,6 +9,5 @@ obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o obj-$(CONFIG_TREE_RCU) += tree.o -obj-$(CONFIG_PREEMPT_RCU) += tree.o obj-$(CONFIG_TINY_RCU) += tiny.o obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index ab504fbc76ca..eabafde2349e 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -454,7 +454,7 @@ enum rcutorture_type { INVALID_RCU_FLAVOR }; -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, unsigned long *gp_seq); void do_trace_rcu_torture_read(const char *rcutorturename, diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 1861103662db..34a7452b25fd 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -435,7 +435,7 @@ struct debug_obj_descr rcuhead_debug_descr = { EXPORT_SYMBOL_GPL(rcuhead_debug_descr); #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_RCU_TRACE) void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, unsigned long secs, unsigned long c_old, unsigned long c) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 70665934d53e..d396aaaf19a3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1268,7 +1268,7 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_do_static_key, }, #endif -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) { .procname = "panic_on_rcu_stall", .data = &sysctl_panic_on_rcu_stall, -- cgit v1.2.3-59-g8ed1b From 77a40f97030b27b3fc1640a3ed203870f0817f57 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 30 Aug 2019 12:36:32 -0400 Subject: rcu: Remove kfree_rcu() special casing and lazy-callback handling This commit removes kfree_rcu() special-casing and the lazy-callback handling from Tree RCU. It moves some of this special casing to Tiny RCU, the removal of which will be the subject of later commits. This results in a nice negative delta. Suggested-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) [ paulmck: Add slab.h #include, thanks to kbuild test robot . ] Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.txt | 11 +++------- include/linux/rcu_segcblist.h | 2 -- include/trace/events/rcu.h | 32 +++++++++++---------------- kernel/rcu/rcu.h | 27 ----------------------- kernel/rcu/rcu_segcblist.c | 25 +++------------------ kernel/rcu/rcu_segcblist.h | 25 ++------------------- kernel/rcu/srcutree.c | 4 ++-- kernel/rcu/tiny.c | 28 +++++++++++++++++++++++- kernel/rcu/tree.c | 40 +++++++++++++++++++++++----------- kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 48 ++++++++++------------------------------- kernel/rcu/tree_stall.h | 6 ++---- 12 files changed, 90 insertions(+), 159 deletions(-) (limited to 'include/trace') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index f48f4621ccbc..a360a8796710 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -225,18 +225,13 @@ an estimate of the total number of RCU callbacks queued across all CPUs In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed for each CPU: - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 Nonlazy posted: ..D + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1 The "last_accelerate:" prints the low-order 16 bits (in hex) of the jiffies counter when this CPU last invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from -rcu_prepare_for_idle(). The "Nonlazy posted:" indicates lazy-callback -status, so that an "l" indicates that all callbacks were lazy at the start -of the last idle period and an "L" indicates that there are currently -no non-lazy callbacks (in both cases, "." is printed otherwise, as -shown above) and "D" indicates that dyntick-idle processing is enabled -("." is printed otherwise, for example, if disabled via the "nohz=" -kernel boot parameter). +rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle +processing is enabled. If the grace period ends just as the stall warning starts printing, there will be a spurious stall-warning message, which will include diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 646759042333..b36afe7b22c9 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -22,7 +22,6 @@ struct rcu_cblist { struct rcu_head *head; struct rcu_head **tail; long len; - long len_lazy; }; #define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } @@ -73,7 +72,6 @@ struct rcu_segcblist { #else long len; #endif - long len_lazy; u8 enabled; u8 offloaded; }; diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 66122602bd08..4ab16fcda895 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -481,16 +481,14 @@ TRACE_EVENT_RCU(rcu_dyntick, */ TRACE_EVENT_RCU(rcu_callback, - TP_PROTO(const char *rcuname, struct rcu_head *rhp, long qlen_lazy, - long qlen), + TP_PROTO(const char *rcuname, struct rcu_head *rhp, long qlen), - TP_ARGS(rcuname, rhp, qlen_lazy, qlen), + TP_ARGS(rcuname, rhp, qlen), TP_STRUCT__entry( __field(const char *, rcuname) __field(void *, rhp) __field(void *, func) - __field(long, qlen_lazy) __field(long, qlen) ), @@ -498,13 +496,12 @@ TRACE_EVENT_RCU(rcu_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; - __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%ps %ld/%ld", + TP_printk("%s rhp=%p func=%ps %ld", __entry->rcuname, __entry->rhp, __entry->func, - __entry->qlen_lazy, __entry->qlen) + __entry->qlen) ); /* @@ -518,15 +515,14 @@ TRACE_EVENT_RCU(rcu_callback, TRACE_EVENT_RCU(rcu_kfree_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset, - long qlen_lazy, long qlen), + long qlen), - TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen), + TP_ARGS(rcuname, rhp, offset, qlen), TP_STRUCT__entry( __field(const char *, rcuname) __field(void *, rhp) __field(unsigned long, offset) - __field(long, qlen_lazy) __field(long, qlen) ), @@ -534,13 +530,12 @@ TRACE_EVENT_RCU(rcu_kfree_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; - __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%ld %ld/%ld", + TP_printk("%s rhp=%p func=%ld %ld", __entry->rcuname, __entry->rhp, __entry->offset, - __entry->qlen_lazy, __entry->qlen) + __entry->qlen) ); /* @@ -552,27 +547,24 @@ TRACE_EVENT_RCU(rcu_kfree_callback, */ TRACE_EVENT_RCU(rcu_batch_start, - TP_PROTO(const char *rcuname, long qlen_lazy, long qlen, long blimit), + TP_PROTO(const char *rcuname, long qlen, long blimit), - TP_ARGS(rcuname, qlen_lazy, qlen, blimit), + TP_ARGS(rcuname, qlen, blimit), TP_STRUCT__entry( __field(const char *, rcuname) - __field(long, qlen_lazy) __field(long, qlen) __field(long, blimit) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("%s CBs=%ld/%ld bl=%ld", - __entry->rcuname, __entry->qlen_lazy, __entry->qlen, - __entry->blimit) + TP_printk("%s CBs=%ld bl=%ld", + __entry->rcuname, __entry->qlen, __entry->blimit) ); /* diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index ab504fbc76ca..c30a1f7dbd15 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -198,33 +198,6 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -void kfree(const void *); - -/* - * Reclaim the specified callback, either by invoking it (non-lazy case) - * or freeing it directly (lazy case). Return true if lazy, false otherwise. - */ -static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) -{ - rcu_callback_t f; - unsigned long offset = (unsigned long)head->func; - - rcu_lock_acquire(&rcu_callback_map); - if (__is_kfree_rcu_offset(offset)) { - trace_rcu_invoke_kfree_callback(rn, head, offset); - kfree((void *)head - offset); - rcu_lock_release(&rcu_callback_map); - return true; - } else { - trace_rcu_invoke_callback(rn, head); - f = head->func; - WRITE_ONCE(head->func, (rcu_callback_t)0L); - f(head); - rcu_lock_release(&rcu_callback_map); - return false; - } -} - #ifdef CONFIG_RCU_STALL_COMMON extern int rcu_cpu_stall_ftrace_dump; diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index cbc87b804db9..5f4fd3b8777c 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -20,14 +20,10 @@ void rcu_cblist_init(struct rcu_cblist *rclp) rclp->head = NULL; rclp->tail = &rclp->head; rclp->len = 0; - rclp->len_lazy = 0; } /* * Enqueue an rcu_head structure onto the specified callback list. - * This function assumes that the callback is non-lazy because it - * is intended for use by no-CBs CPUs, which do not distinguish - * between lazy and non-lazy RCU callbacks. */ void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp) { @@ -54,7 +50,6 @@ void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, else drclp->tail = &drclp->head; drclp->len = srclp->len; - drclp->len_lazy = srclp->len_lazy; if (!rhp) { rcu_cblist_init(srclp); } else { @@ -62,16 +57,12 @@ void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, srclp->head = rhp; srclp->tail = &rhp->next; WRITE_ONCE(srclp->len, 1); - srclp->len_lazy = 0; } } /* * Dequeue the oldest rcu_head structure from the specified callback - * list. This function assumes that the callback is non-lazy, but - * the caller can later invoke rcu_cblist_dequeued_lazy() if it - * finds otherwise (and if it cares about laziness). This allows - * different users to have different ways of determining laziness. + * list. */ struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) { @@ -161,7 +152,6 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp) for (i = 0; i < RCU_CBLIST_NSEGS; i++) rsclp->tails[i] = &rsclp->head; rcu_segcblist_set_len(rsclp, 0); - rsclp->len_lazy = 0; rsclp->enabled = 1; } @@ -173,7 +163,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) { WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); - WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); rsclp->enabled = 0; } @@ -253,11 +242,9 @@ bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp) * absolutely not OK for it to ever miss posting a callback. */ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy) + struct rcu_head *rhp) { rcu_segcblist_inc_len(rsclp); - if (lazy) - rsclp->len_lazy++; smp_mb(); /* Ensure counts are updated before callback is enqueued. */ rhp->next = NULL; WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); @@ -275,15 +262,13 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, * period. You have been warned. */ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy) + struct rcu_head *rhp) { int i; if (rcu_segcblist_n_cbs(rsclp) == 0) return false; rcu_segcblist_inc_len(rsclp); - if (lazy) - rsclp->len_lazy++; smp_mb(); /* Ensure counts are updated before callback is entrained. */ rhp->next = NULL; for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) @@ -307,8 +292,6 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp) { - rclp->len_lazy += rsclp->len_lazy; - rsclp->len_lazy = 0; rclp->len = rcu_segcblist_xchg_len(rsclp, 0); } @@ -361,9 +344,7 @@ void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp) { - rsclp->len_lazy += rclp->len_lazy; rcu_segcblist_add_len(rsclp, rclp->len); - rclp->len_lazy = 0; rclp->len = 0; } diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 815c2fdd3fcc..5c293afc07b8 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -15,15 +15,6 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) return READ_ONCE(rclp->len); } -/* - * Account for the fact that a previously dequeued callback turned out - * to be marked as lazy. - */ -static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) -{ - rclp->len_lazy--; -} - void rcu_cblist_init(struct rcu_cblist *rclp); void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp); void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp, @@ -59,18 +50,6 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) #endif } -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) -{ - return rsclp->len_lazy; -} - -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_n_cbs(rsclp) - rsclp->len_lazy; -} - /* * Is the specified rcu_segcblist enabled, for example, not corresponding * to an offline CPU? @@ -106,9 +85,9 @@ struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp); void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy); + struct rcu_head *rhp); bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy); + struct rcu_head *rhp); void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, struct rcu_cblist *rclp); void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 5dffade2d7cd..d0a9d5b69087 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -853,7 +853,7 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, local_irq_save(flags); sdp = this_cpu_ptr(ssp->sda); spin_lock_rcu_node(sdp); - rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_gp_seq)); s = rcu_seq_snap(&ssp->srcu_gp_seq); @@ -1052,7 +1052,7 @@ void srcu_barrier(struct srcu_struct *ssp) sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, - &sdp->srcu_barrier_head, 0)) { + &sdp->srcu_barrier_head)) { debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&ssp->srcu_barrier_cpu_cnt); } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 477b4eb44af5..dd572ce7c747 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "rcu.h" @@ -73,6 +74,31 @@ void rcu_sched_clock_irq(int user) } } +/* + * Reclaim the specified callback, either by invoking it for non-kfree cases or + * freeing it directly (for kfree). Return true if kfreeing, false otherwise. + */ +static inline bool rcu_reclaim_tiny(struct rcu_head *head) +{ + rcu_callback_t f; + unsigned long offset = (unsigned long)head->func; + + rcu_lock_acquire(&rcu_callback_map); + if (__is_kfree_rcu_offset(offset)) { + trace_rcu_invoke_kfree_callback("", head, offset); + kfree((void *)head - offset); + rcu_lock_release(&rcu_callback_map); + return true; + } + + trace_rcu_invoke_callback("", head); + f = head->func; + WRITE_ONCE(head->func, (rcu_callback_t)0L); + f(head); + rcu_lock_release(&rcu_callback_map); + return false; +} + /* Invoke the RCU callbacks whose grace period has elapsed. */ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) { @@ -100,7 +126,7 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); - __rcu_reclaim("", list); + rcu_reclaim_tiny(list); local_bh_enable(); list = next; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0512221cd84b..a8dd612098bf 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include "../time/tick-internal.h" @@ -2146,7 +2147,6 @@ static void rcu_do_batch(struct rcu_data *rdp) /* If no callbacks are ready, just return. */ if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { trace_rcu_batch_start(rcu_state.name, - rcu_segcblist_n_lazy_cbs(&rdp->cblist), rcu_segcblist_n_cbs(&rdp->cblist), 0); trace_rcu_batch_end(rcu_state.name, 0, !rcu_segcblist_empty(&rdp->cblist), @@ -2168,7 +2168,6 @@ static void rcu_do_batch(struct rcu_data *rdp) if (unlikely(bl > 100)) tlimit = local_clock() + rcu_resched_ns; trace_rcu_batch_start(rcu_state.name, - rcu_segcblist_n_lazy_cbs(&rdp->cblist), rcu_segcblist_n_cbs(&rdp->cblist), bl); rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); if (offloaded) @@ -2179,9 +2178,19 @@ static void rcu_do_batch(struct rcu_data *rdp) tick_dep_set_task(current, TICK_DEP_BIT_RCU); rhp = rcu_cblist_dequeue(&rcl); for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { + rcu_callback_t f; + debug_rcu_head_unqueue(rhp); - if (__rcu_reclaim(rcu_state.name, rhp)) - rcu_cblist_dequeued_lazy(&rcl); + + rcu_lock_acquire(&rcu_callback_map); + trace_rcu_invoke_callback(rcu_state.name, rhp); + + f = rhp->func; + WRITE_ONCE(rhp->func, (rcu_callback_t)0L); + f(rhp); + + rcu_lock_release(&rcu_callback_map); + /* * Stop only if limit reached and CPU has something to do. * Note: The rcl structure counts down from zero. @@ -2583,7 +2592,7 @@ static void rcu_leak_callback(struct rcu_head *rhp) * is expected to specify a CPU. */ static void -__call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy) +__call_rcu(struct rcu_head *head, rcu_callback_t func) { unsigned long flags; struct rcu_data *rdp; @@ -2618,18 +2627,17 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy) if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); } + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) return; // Enqueued onto ->nocb_bypass, so just leave. /* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */ - rcu_segcblist_enqueue(&rdp->cblist, head, lazy); + rcu_segcblist_enqueue(&rdp->cblist, head); if (__is_kfree_rcu_offset((unsigned long)func)) trace_rcu_kfree_callback(rcu_state.name, head, (unsigned long)func, - rcu_segcblist_n_lazy_cbs(&rdp->cblist), rcu_segcblist_n_cbs(&rdp->cblist)); else trace_rcu_callback(rcu_state.name, head, - rcu_segcblist_n_lazy_cbs(&rdp->cblist), rcu_segcblist_n_cbs(&rdp->cblist)); /* Go handle any RCU core processing required. */ @@ -2679,7 +2687,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy) */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - __call_rcu(head, func, 0); + __call_rcu(head, func); } EXPORT_SYMBOL_GPL(call_rcu); @@ -2747,10 +2755,18 @@ static void kfree_rcu_work(struct work_struct *work) // List "head" is now private, so traverse locklessly. for (; head; head = next) { + unsigned long offset = (unsigned long)head->func; + next = head->next; // Potentially optimize with kfree_bulk in future. debug_rcu_head_unqueue(head); - __rcu_reclaim(rcu_state.name, head); + rcu_lock_acquire(&rcu_callback_map); + trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset); + + /* Could be possible to optimize with kfree_bulk in future */ + kfree((void *)head - offset); + + rcu_lock_release(&rcu_callback_map); cond_resched_tasks_rcu_qs(); } } @@ -2825,7 +2841,7 @@ static void kfree_rcu_monitor(struct work_struct *work) */ void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func) { - __call_rcu(head, func, 1); + __call_rcu(head, func); } EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch); @@ -3100,7 +3116,7 @@ static void rcu_barrier_func(void *unused) debug_rcu_head_queue(&rdp->barrier_head); rcu_nocb_lock(rdp); WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); - if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { + if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { atomic_inc(&rcu_state.barrier_cpu_count); } else { debug_rcu_head_unqueue(&rdp->barrier_head); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 055c31781d3a..15405420b40c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -183,7 +183,6 @@ struct rcu_data { bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ #ifdef CONFIG_RCU_FAST_NO_HZ - bool all_lazy; /* All CPU's CBs lazy at idle start? */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa08d55f7040..d5334e49ccca 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1262,10 +1262,9 @@ static void rcu_prepare_for_idle(void) /* * This code is invoked when a CPU goes idle, at which point we want * to have the CPU do everything required for RCU so that it can enter - * the energy-efficient dyntick-idle mode. This is handled by a - * state machine implemented by rcu_prepare_for_idle() below. + * the energy-efficient dyntick-idle mode. * - * The following three proprocessor symbols control this state machine: + * The following preprocessor symbol controls this: * * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This @@ -1274,21 +1273,15 @@ static void rcu_prepare_for_idle(void) * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your * system. And if you are -that- concerned about energy efficiency, * just power the system down and be done with it! - * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is - * permitted to sleep in dyntick-idle mode with only lazy RCU - * callbacks pending. Setting this too high can OOM your system. * - * The values below work well in practice. If future workloads require + * The value below works well in practice. If future workloads require * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ -#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); -static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; -module_param(rcu_idle_lazy_gp_delay, int, 0644); /* * Try to advance callbacks on the current CPU, but only if it has been @@ -1327,8 +1320,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) /* * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready * to invoke. If the CPU has callbacks, try to advance them. Tell the - * caller to set the timeout based on whether or not there are non-lazy - * callbacks. + * caller about what to set the timeout. * * The caller must have disabled interrupts. */ @@ -1354,25 +1346,18 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) } rdp->last_accelerate = jiffies; - /* Request timer delay depending on laziness, and round. */ - rdp->all_lazy = !rcu_segcblist_n_nonlazy_cbs(&rdp->cblist); - if (rdp->all_lazy) { - dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; - } else { - dj = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; - } + /* Request timer and round. */ + dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; + *nextevt = basemono + dj * TICK_NSEC; return 0; } /* - * Prepare a CPU for idle from an RCU perspective. The first major task - * is to sense whether nohz mode has been enabled or disabled via sysfs. - * The second major task is to check to see if a non-lazy callback has - * arrived at a CPU that previously had only lazy callbacks. The third - * major task is to accelerate (that is, assign grace-period numbers to) - * any recently arrived callbacks. + * Prepare a CPU for idle from an RCU perspective. The first major task is to + * sense whether nohz mode has been enabled or disabled via sysfs. The second + * major task is to accelerate (that is, assign grace-period numbers to) any + * recently arrived callbacks. * * The caller must have disabled interrupts. */ @@ -1398,17 +1383,6 @@ static void rcu_prepare_for_idle(void) if (!tne) return; - /* - * If a non-lazy callback arrived at a CPU having only lazy - * callbacks, invoke RCU core for the side-effect of recalculating - * idle duration on re-entry to idle. - */ - if (rdp->all_lazy && rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)) { - rdp->all_lazy = false; - invoke_rcu_core(); - return; - } - /* * If we have not yet accelerated this jiffy, accelerate all * callbacks on this CPU. diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index c0b8c458d8a6..806f2ddc8f74 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -263,11 +263,9 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c", + sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", rdp->last_accelerate & 0xffff, jiffies & 0xffff, - ".l"[rdp->all_lazy], - ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)], - ".D"[!!rdp->tick_nohz_enabled_snap]); + !!rdp->tick_nohz_enabled_snap); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -- cgit v1.2.3-59-g8ed1b