From 578f5cbe7f89714a648e4a8510e3770153abcaf9 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 19 Feb 2025 07:50:43 -0500 Subject: MAINTAINERS: Update Joel's email address Update MAINTAINERS file to reflect changes to Joel's email address for upstream work. Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96b827049501..8eb7e057b8f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1682,7 +1682,7 @@ M: Greg Kroah-Hartman M: Arve Hjønnevåg M: Todd Kjos M: Martijn Coenen -M: Joel Fernandes +M: Joel Fernandes M: Christian Brauner M: Carlos Llamas M: Suren Baghdasaryan @@ -13705,7 +13705,7 @@ M: Luc Maranget M: "Paul E. McKenney" R: Akira Yokosawa R: Daniel Lustig -R: Joel Fernandes +R: Joel Fernandes L: linux-kernel@vger.kernel.org L: linux-arch@vger.kernel.org L: lkmm@lists.linux.dev @@ -20291,7 +20291,7 @@ READ-COPY UPDATE (RCU) M: "Paul E. McKenney" M: Frederic Weisbecker (kernel/rcu/tree_nocb.h) M: Neeraj Upadhyay (kernel/rcu/tasks.h) -M: Joel Fernandes +M: Joel Fernandes M: Josh Triplett M: Boqun Feng M: Uladzislau Rezki -- cgit v1.2.3-59-g8ed1b From 31b7ce3d98a57de80e255f5124f8dbdfb4be8777 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Jan 2025 13:42:23 -0800 Subject: rcutorture: Make srcu_lockdep.sh check kernel Kconfig The srcu_lockdep.sh currently blindly trusts the rcutorture SRCU-P scenario to build its kernel with lockdep enabled. Of course, this dependency might not be obvious to someone rebalancing SRCU scenarios. This commit therefore adds code to srcu_lockdep.sh that verifies that the .config file has lockdep enabled. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 2db12c5cad9c..b94f6d3445c6 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -39,8 +39,9 @@ do shift done -err= nerrs=0 + +# Test lockdep's handling of deadlocks. for d in 0 1 do for t in 0 1 2 @@ -52,6 +53,12 @@ do tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi if test "$d" -ne 0 && test "$ret" -eq 0 then err=1 @@ -71,6 +78,8 @@ do done done done + +# Set up exit code. if test "$nerrs" -ne 0 then exit 1 -- cgit v1.2.3-59-g8ed1b From 75d8bf48a8bef2628243d0d76599c710412ba25b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Jan 2025 15:59:30 -0800 Subject: rcutorture: Make srcu_lockdep.sh check reader-conflict handling Mixing different flavors of RCU readers is forbidden, for example, you should not use srcu_read_lock() and srcu_read_lock_nmisafe() on the same srcu_struct structure. There are checks for this, but these checks are not tested on a regular basis. This commit therefore adds such tests to srcu_lockdep.sh. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- .../selftests/rcutorture/bin/srcu_lockdep.sh | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index b94f6d3445c6..208be7d09a61 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -79,6 +79,37 @@ do done done +# Test lockdep-enabled testing of mixed SRCU readers. +for val in 0x1 0xf +do + err= + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + ret=$? + mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi + if test "$val" -eq 0xf && test "$ret" -eq 0 + then + err=1 + echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test "$val" -eq 0x1 && test "$ret" -ne 0 + then + err=1 + echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test -n "$err" + then + grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + nerrs=$((nerrs+1)) + fi +done + # Set up exit code. if test "$nerrs" -ne 0 then -- cgit v1.2.3-59-g8ed1b From e73e5b7c1acd51fc5000dc476ceb76184b6ca253 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 5 Feb 2025 12:49:32 -0800 Subject: rcutorture: Split out beginning and end from rcu_torture_one_read() The rcu_torture_one_read() function is designed for RCU readers that are confined to a task, such that a single thread of control extends from the beginning of a given RCU read-side critical section to its end. This does not suffice for things like srcu_down_read() and srcu_up_read(), where the critical section might start at task level and end in a timer handler. This commit therefore creates separate init_rcu_torture_one_read_state(), rcu_torture_one_read_start(), and rcu_torture_one_read_end() functions, along with a rcu_torture_one_read_state structure to coordinate their actions. These will be used to create tests for srcu_down_read() and friends. One caution: The caller to rcu_torture_one_read_start() must enter the initial read-side critical section prior to the call. This enables use of non-standard primitives such as srcu_down_read() while still using the same validation code. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcutorture.c | 124 +++++++++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 43 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 4fa7772be183..ae7b68043150 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2164,53 +2164,70 @@ rcutorture_loop_extend(int *readstate, bool insoftirq, struct torture_random_sta return &rtrsp[j]; } -/* - * Do one read-side critical section, returning false if there was - * no data to read. Can be invoked both from process context and - * from a timer handler. - */ -static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) -{ - bool checkpolling = !(torture_random(trsp) & 0xfff); +struct rcu_torture_one_read_state { + bool checkpolling; unsigned long cookie; struct rcu_gp_oldstate cookie_full; - int i; unsigned long started; - unsigned long completed; - int newstate; struct rcu_torture *p; - int pipe_count; - bool preempted = false; - int readstate = 0; - struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } }; - struct rt_read_seg *rtrsp = &rtseg[0]; - struct rt_read_seg *rtrsp1; + int readstate; + struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS]; + struct rt_read_seg *rtrsp; unsigned long long ts; +}; - WARN_ON_ONCE(!rcu_is_watching()); - newstate = rcutorture_extend_mask(readstate, trsp); - rcutorture_one_extend(&readstate, newstate, myid < 0, trsp, rtrsp++); - if (checkpolling) { +static void init_rcu_torture_one_read_state(struct rcu_torture_one_read_state *rtorsp, + struct torture_random_state *trsp) +{ + memset(rtorsp, 0, sizeof(*rtorsp)); + rtorsp->checkpolling = !(torture_random(trsp) & 0xfff); + rtorsp->rtrsp = &rtorsp->rtseg[0]; +} + +/* + * Set up the first segment of a series of overlapping read-side + * critical sections. The caller must have actually initiated the + * outermost read-side critical section. + */ +static bool rcu_torture_one_read_start(struct rcu_torture_one_read_state *rtorsp, + struct torture_random_state *trsp, long myid) +{ + if (rtorsp->checkpolling) { if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); + rtorsp->cookie = cur_ops->get_gp_state(); if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) - cur_ops->get_gp_state_full(&cookie_full); + cur_ops->get_gp_state_full(&rtorsp->cookie_full); } - started = cur_ops->get_gp_seq(); - ts = rcu_trace_clock_local(); - p = rcu_dereference_check(rcu_torture_current, + rtorsp->started = cur_ops->get_gp_seq(); + rtorsp->ts = rcu_trace_clock_local(); + rtorsp->p = rcu_dereference_check(rcu_torture_current, !cur_ops->readlock_held || cur_ops->readlock_held()); - if (p == NULL) { + if (rtorsp->p == NULL) { /* Wait for rcu_torture_writer to get underway */ - rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp); + rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp); return false; } - if (p->rtort_mbtest == 0) + if (rtorsp->p->rtort_mbtest == 0) atomic_inc(&n_rcu_torture_mberror); - rcu_torture_reader_do_mbchk(myid, p, trsp); - rtrsp = rcutorture_loop_extend(&readstate, myid < 0, trsp, rtrsp); + rcu_torture_reader_do_mbchk(myid, rtorsp->p, trsp); + return true; +} + +/* + * Complete the last segment of a series of overlapping read-side + * critical sections and check for errors. + */ +static void rcu_torture_one_read_end(struct rcu_torture_one_read_state *rtorsp, + struct torture_random_state *trsp, long myid) +{ + int i; + unsigned long completed; + int pipe_count; + bool preempted = false; + struct rt_read_seg *rtrsp1; + preempt_disable(); - pipe_count = READ_ONCE(p->rtort_pipe_count); + pipe_count = READ_ONCE(rtorsp->p->rtort_pipe_count); if (pipe_count > RCU_TORTURE_PIPE_LEN) { // Should not happen in a correct RCU implementation, // happens quite often for torture_type=busted. @@ -2218,28 +2235,28 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } completed = cur_ops->get_gp_seq(); if (pipe_count > 1) { - do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, - ts, started, completed); + do_trace_rcu_torture_read(cur_ops->name, &rtorsp->p->rtort_rcu, + rtorsp->ts, rtorsp->started, completed); rcu_ftrace_dump(DUMP_ALL); } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = rcutorture_seq_diff(completed, started); + completed = rcutorture_seq_diff(completed, rtorsp->started); if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; } __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); - if (checkpolling) { + if (rtorsp->checkpolling) { if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ONCE(cur_ops->poll_gp_state(cookie), + WARN_ONCE(cur_ops->poll_gp_state(rtorsp->cookie), "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", __func__, rcu_torture_writer_state_getname(), rcu_torture_writer_state, - cookie, cur_ops->get_gp_state()); + rtorsp->cookie, cur_ops->get_gp_state()); if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) - WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full), + WARN_ONCE(cur_ops->poll_gp_state_full(&rtorsp->cookie_full), "%s: Cookie check 6 failed %s(%d) online %*pbl\n", __func__, rcu_torture_writer_state_getname(), @@ -2248,21 +2265,42 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } if (cur_ops->reader_blocked) preempted = cur_ops->reader_blocked(); - rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp); - WARN_ON_ONCE(readstate); + rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp); + WARN_ON_ONCE(rtorsp->readstate); // This next splat is expected behavior if leakpointer, especially // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels. - WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1); + WARN_ON_ONCE(leakpointer && READ_ONCE(rtorsp->p->rtort_pipe_count) > 1); /* If error or close call, record the sequence of reader protections. */ if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) { i = 0; - for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++) + for (rtrsp1 = &rtorsp->rtseg[0]; rtrsp1 < rtorsp->rtrsp; rtrsp1++) err_segs[i++] = *rtrsp1; rt_read_nsegs = i; rt_read_preempted = preempted; } +} +/* + * Do one read-side critical section, returning false if there was + * no data to read. Can be invoked both from process context and + * from a timer handler. + */ +static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) +{ + int newstate; + struct rcu_torture_one_read_state rtors; + + WARN_ON_ONCE(!rcu_is_watching()); + init_rcu_torture_one_read_state(&rtors, trsp); + newstate = rcutorture_extend_mask(rtors.readstate, trsp); + rcutorture_one_extend(&rtors.readstate, newstate, myid < 0, trsp, rtors.rtrsp++); + if (!rcu_torture_one_read_start(&rtors, trsp, myid)) { + rcutorture_one_extend(&rtors.readstate, 0, myid < 0, trsp, rtors.rtrsp); + return false; + } + rtors.rtrsp = rcutorture_loop_extend(&rtors.readstate, myid < 0, trsp, rtors.rtrsp); + rcu_torture_one_read_end(&rtors, trsp, myid); return true; } -- cgit v1.2.3-59-g8ed1b From 9c94c5ad39182ccf8ae81a2fa4fa4e63abedcbf7 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 24 Mar 2025 04:32:56 -0400 Subject: rcu: Replace magic number with meaningful constant in rcu_seq_done_exact() The rcu_seq_done_exact() function checks if a grace period has completed by comparing sequence numbers. It includes a guard band to handle sequence number wraparound, which was previously expressed using the magic number calculation '3 * RCU_SEQ_STATE_MASK + 1'. This magic number is not immediately obvious in terms of what it represents. Instead, the reason we need this tiny guardband is because of the lag between the setting of rcu_state.gp_seq_polled and root rnp's gp_seq in rcu_gp_init(). This guardband needs to be at least 2 GPs worth of counts, to avoid recognizing the newly started GP as completed immediately, due to the following sequence which arises due to the delay between update of rcu_state.gp_seq_polled and root rnp's gp_seq: rnp->gp_seq = rcu_state.gp_seq = 0 CPU 0 CPU 1 ----- ----- // rcu_state.gp_seq = 1 rcu_seq_start(&rcu_state.gp_seq) // snap = 8 snap = rcu_seq_snap(&rcu_state.gp_seq) // Two full GP differences rcu_seq_done_exact(&rnp->gp_seq, snap) // rnp->gp_seq = 1 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq); This can happen due to get_state_synchronize_rcu_full() sampling rcu_state.gp_seq_polled, however the poll_state_synchronize_rcu_full() sampling the root rnp's gp_seq. The delay between the update of the 2 counters occurs in rcu_gp_init() during which the counters briefly go out of sync. Make the guardband explictly 2 GPs. This improves code readability and maintainability by making the intent clearer as well. Suggested-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index eed2951a4962..5e1ee570bb27 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -57,6 +57,9 @@ /* Low-order bit definition for polled grace-period APIs. */ #define RCU_GET_STATE_COMPLETED 0x1 +/* A complete grace period count */ +#define RCU_SEQ_GP (RCU_SEQ_STATE_MASK + 1) + extern int sysctl_sched_rt_runtime; /* @@ -162,7 +165,7 @@ static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s) { unsigned long cur_s = READ_ONCE(*sp); - return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1)); + return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_GP)); } /* -- cgit v1.2.3-59-g8ed1b From 4aa6e94cf90c0d7414580bd6a936600aef8b1e43 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 24 Mar 2025 09:29:26 -0400 Subject: rcu: Add warning to ensure rcu_seq_done_exact() is working The previous patch improved the rcu_seq_done_exact() function by adding a meaningful constant for the guardband. Ensure that this is working for the future by a quick check during rcu_gp_init(). Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/tree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 659f83e71048..6b1bb85b2a56 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1798,6 +1798,7 @@ static noinline_for_stack bool rcu_gp_init(void) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(); bool start_new_poll; + unsigned long old_gp_seq; WRITE_ONCE(rcu_state.gp_activity, jiffies); raw_spin_lock_irq_rcu_node(rnp); @@ -1825,7 +1826,12 @@ static noinline_for_stack bool rcu_gp_init(void) */ start_new_poll = rcu_sr_normal_gp_init(); /* Record GP times before starting GP, hence rcu_seq_start(). */ + old_gp_seq = rcu_state.gp_seq; rcu_seq_start(&rcu_state.gp_seq); + /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && + rcu_seq_done_exact(&old_gp_seq, rcu_seq_snap(&rcu_state.gp_seq))); + ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start")); rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap); -- cgit v1.2.3-59-g8ed1b From 4d949edbc4026eeb81a8f931586cfd4f1de5957e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Mar 2025 14:56:18 +0100 Subject: rcu: Comment on the extraneous delta test on rcu_seq_done_exact() The numbers used in rcu_seq_done_exact() lack some explanation behind their magic. Especially after the commit: 85aad7cc4178 ("rcu: Fix get_state_synchronize_rcu_full() GP-start detection") which reported a subtle issue where a new GP sequence snapshot was taken on the root node state while a grace period had already been started and reflected on the global state sequence but not yet on the root node sequence, making a polling user waiting on a wrong already started grace period that would ignore freshly online CPUs. The fix involved taking the snaphot on the global state sequence and waiting on the root node sequence. And since a grace period is first started on the global state and only afterward reflected on the root node, a snapshot taken on the global state sequence might be two full grace periods ahead of the root node as in the following example: rnp->gp_seq = rcu_state.gp_seq = 0 CPU 0 CPU 1 ----- ----- // rcu_state.gp_seq = 1 rcu_seq_start(&rcu_state.gp_seq) // snap = 8 snap = rcu_seq_snap(&rcu_state.gp_seq) // Two full GP differences rcu_seq_done_exact(&rnp->gp_seq, snap) // rnp->gp_seq = 1 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq); Add a comment about those expectations and to clarify the magic within the relevant function. Note that the issue arises mainly with the use of rcu_seq_done_exact() which has a much tigher guardband (of 2 GPs) to ensure the false-negative window of the API during wraparound is limited to just 2 GPs. rcu_seq_done() does not have such strict requirements, however its large false-negative window of ULONG_MAX/2 is not ideal for the polling API. However, this also means care is needed to ensure the guardband is as large as needed to avoid the example scenario describe above which a warning added in an earlier patch does. [ Comment wordsmithing by Joel ] Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcu.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 5e1ee570bb27..db63f330768c 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -160,6 +160,15 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) * Given a snapshot from rcu_seq_snap(), determine whether or not a * full update-side operation has occurred, but do not allow the * (ULONG_MAX / 2) safety-factor/guard-band. + * + * The token returned by get_state_synchronize_rcu_full() is based on + * rcu_state.gp_seq but it is tested in poll_state_synchronize_rcu_full() + * against the root rnp->gp_seq. Since rcu_seq_start() is first called + * on rcu_state.gp_seq and only later reflected on the root rnp->gp_seq, + * it is possible that rcu_seq_snap(rcu_state.gp_seq) returns 2 full grace + * periods ahead of the root rnp->gp_seq. To prevent false-positives with the + * full polling API that a wrap around instantly completed the GP, when nothing + * like that happened, adjust for the 2 GPs in the ULONG_CMP_LT(). */ static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s) { -- cgit v1.2.3-59-g8ed1b From f50ad4b73e1bdf7fb40566650fa11bcb438ad5ce Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 13 Feb 2025 11:47:27 -0500 Subject: srcu: Use rcu_seq_done_exact() for polling API poll_state_synchronize_srcu() uses rcu_seq_done() unlike poll_state_synchronize_rcu() which uses rcu_seq_done_exact(). The rcu_seq_done_exact() makes more sense for polling API, as with this API, there is a higher chance that there is a significant delay between the get_state..() and poll_state..() calls since a cookie can be stored and reused at a later time. During such a delay, if the gp_seq counter progresses more than ULONG_MAX/2 distance, then poll_state..() may return false for a long time unwantedly. Fix by using the more accurate rcu_seq_done_exact() API which is exactly what straight RCU's polling does. It may make sense, as future work, to add debug code here as well, where we compare a physical timestamp between get_state..() and poll_state() calls and yell if significant time has past but the grace period has still not progressed. Reviewed-by: Neeraj Upadhyay Reviewed-by: Paul E. McKenney Reviewed-by: Kent Overstreet Cc: Kent Overstreet Signed-off-by: Joel Fernandes --- kernel/rcu/srcutree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 9a59b071501b..48047260697e 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1589,7 +1589,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { if (cookie != SRCU_GET_STATE_COMPLETED && - !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) + !rcu_seq_done_exact(&ssp->srcu_sup->srcu_gp_seq, cookie)) return false; // Ensure that the end of the SRCU grace period happens before // any subsequent code that the caller might execute. -- cgit v1.2.3-59-g8ed1b From a3204f778cf7e37c7344404768398b4f9d43a368 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2025 05:28:03 -0800 Subject: rcutorture: Make torture.sh --do-rt use CONFIG_PREEMPT_RT The torture.sh --do-rt command-line parameter is intended to mimic -rt kernels. Now that CONFIG_PREEMPT_RT is upstream, this commit makes this mimicking more precise. Note that testing of RCU priority boosting is disabled in favor of forward-progress testing of RCU callbacks. If it turns out to be possible to make kernels built with CONFIG_PREEMPT_RT=y to tolerate testing of both, both will be enabled. [ paulmck: Apply Sebastian Siewior feedback. ] Signed-off-by: Paul E. McKenney Cc: Sebastian Andrzej Siewior Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/torture.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 0447c4a00cc4..b64b356f55ff 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -448,13 +448,17 @@ fi if test "$do_rt" = "yes" then - # With all post-boot grace periods forced to normal. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" - torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + # In both runs, disable testing of RCU priority boosting because + # -rt doesn't like its interaction with testing of callback + # flooding. + + # With all post-boot grace periods forced to normal (default for PREEMPT_RT). + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0" + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make # With all post-boot grace periods forced to expedited. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" - torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0" + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make fi if test "$do_srcu_lockdep" = "yes" -- cgit v1.2.3-59-g8ed1b From 79b265a2e75d0745dcac5a7d5128fc8596ede502 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 16 May 2025 19:20:24 +0800 Subject: MAINTAINERS: Update Zqiang's email address This patch updates Zqiang's email address to qiang.zhang@linux.dev. Signed-off-by: Zqiang Acked-by: Joel Fernandes Signed-off-by: Joel Fernandes --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8eb7e057b8f9..901a54a633f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20298,7 +20298,7 @@ M: Uladzislau Rezki R: Steven Rostedt R: Mathieu Desnoyers R: Lai Jiangshan -R: Zqiang +R: Zqiang L: rcu@vger.kernel.org S: Supported W: http://www.rdrop.com/users/paulmck/RCU/ -- cgit v1.2.3-59-g8ed1b From 0999f6156013107aecc5b968386b5945d42ba177 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Dec 2024 18:07:11 +0100 Subject: rcu: Remove swake_up_one_online() bandaid It's now ok to perform a wake-up from an offline CPU because the resulting armed scheduler bandwidth hrtimers are now correctly targeted by hrtimer infrastructure. Remove the obsolete hackerry. Link: https://lore.kernel.org/all/20241231170712.149394-3-frederic@kernel.org/ Reviewed-by: Usama Arif Reviewed-by: Joel Fernandes Signed-off-by: Frederic Weisbecker Signed-off-by: Joel Fernandes --- kernel/rcu/tree.c | 34 +--------------------------------- kernel/rcu/tree_exp.h | 2 +- kernel/rcu/tree_nocb.h | 2 +- 3 files changed, 3 insertions(+), 35 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 659f83e71048..1b8eee06183c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1060,38 +1060,6 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } -static void swake_up_one_online_ipi(void *arg) -{ - struct swait_queue_head *wqh = arg; - - swake_up_one(wqh); -} - -static void swake_up_one_online(struct swait_queue_head *wqh) -{ - int cpu = get_cpu(); - - /* - * If called from rcutree_report_cpu_starting(), wake up - * is dangerous that late in the CPU-down hotplug process. The - * scheduler might queue an ignored hrtimer. Defer the wake up - * to an online CPU instead. - */ - if (unlikely(cpu_is_offline(cpu))) { - int target; - - target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), - cpu_online_mask); - - smp_call_function_single(target, swake_up_one_online_ipi, - wqh, 0); - put_cpu(); - } else { - put_cpu(); - swake_up_one(wqh); - } -} - /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1116,7 +1084,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one_online(&rcu_state.gp_wq); + swake_up_one(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8d4895c854c5..c36c7d5575ca 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -200,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, if (rnp->parent == NULL) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) - swake_up_one_online(&rcu_state.expedited_wq); + swake_up_one(&rcu_state.expedited_wq); break; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index fa269d34167a..109bc2df1d9a 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -216,7 +216,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp, raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); if (needwake) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake")); - swake_up_one_online(&rdp_gp->nocb_gp_wq); + swake_up_one(&rdp_gp->nocb_gp_wq); } return needwake; -- cgit v1.2.3-59-g8ed1b From da6b85598af30e9fec34d82882d7e1e39f3da769 Mon Sep 17 00:00:00 2001 From: Yongliang Gao Date: Sun, 16 Feb 2025 16:41:09 +0800 Subject: rcu/cpu_stall_cputime: fix the hardirq count for x86 architecture When counting the number of hardirqs in the x86 architecture, it is essential to add arch_irq_stat_cpu to ensure accuracy. For example, a CPU loop within the rcu_read_lock function. Before: [ 70.910184] rcu: INFO: rcu_preempt self-detected stall on CPU [ 70.910436] rcu: 3-....: (4999 ticks this GP) idle=*** [ 70.910711] rcu: hardirqs softirqs csw/system [ 70.910870] rcu: number: 0 657 0 [ 70.911024] rcu: cputime: 0 0 2498 ==> 2498(ms) [ 70.911278] rcu: (t=5001 jiffies g=3677 q=29 ncpus=8) After: [ 68.046132] rcu: INFO: rcu_preempt self-detected stall on CPU [ 68.046354] rcu: 2-....: (4999 ticks this GP) idle=*** [ 68.046628] rcu: hardirqs softirqs csw/system [ 68.046793] rcu: number: 2498 663 0 [ 68.046951] rcu: cputime: 0 0 2496 ==> 2496(ms) [ 68.047244] rcu: (t=5000 jiffies g=3825 q=4 ncpus=8) Fixes: be42f00b73a0 ("rcu: Add RCU stall diagnosis information") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501090842.SfI6QPGS-lkp@intel.com/ Signed-off-by: Yongliang Gao Reviewed-by: Neeraj Upadhyay Link: https://lore.kernel.org/r/20250216084109.3109837-1-leonylgao@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Joel Fernandes --- kernel/rcu/tree.c | 10 +++++++--- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_stall.h | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1b8eee06183c..0b0dd9e1f057 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -801,6 +801,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp) return 0; } +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif + /* * Returns positive if the specified CPU has passed through a quiescent state * by virtue of being in or having passed through an dynticks idle state since @@ -936,9 +940,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp) rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); - rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); - rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); - rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu); + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu); + rsrp->nr_csw = nr_context_switches_cpu(cpu); rsrp->jiffies = jiffies; rsrp->gp_seq = rdp->gp_seq; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a9a811d9d7a3..1bba2225e744 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -168,7 +168,7 @@ struct rcu_snap_record { u64 cputime_irq; /* Accumulated cputime of hard irqs */ u64 cputime_softirq;/* Accumulated cputime of soft irqs */ u64 cputime_system; /* Accumulated cputime of kernel tasks */ - unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ + u64 nr_hardirqs; /* Accumulated number of hard irqs */ unsigned int nr_softirqs; /* Accumulated number of soft irqs */ unsigned long long nr_csw; /* Accumulated number of task switches */ unsigned long jiffies; /* Track jiffies value */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 925fcdad5dea..56b21219442b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu) rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); pr_err("\t hardirqs softirqs csw/system\n"); - pr_err("\t number: %8ld %10d %12lld\n", - kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, + pr_err("\t number: %8lld %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs, kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, nr_context_switches_cpu(cpu) - rsrp->nr_csw); pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", -- cgit v1.2.3-59-g8ed1b From 9520371e3daa75cd508b262ac16a732d3ffc9a30 Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Wed, 19 Mar 2025 10:26:40 +0800 Subject: rust: sync: rcu: Mark Guard methods as inline Currently the implementation of "Guard" methods are basically wrappers around rcu's function within kernel. Building the kernel with llvm 18.1.8 on x86_64 machine will generate the following symbols: $ nm vmlinux | grep ' _R'.*Guard | rustfilt ffffffff817b6c90 T ::new ffffffff817b6cb0 T ::unlock ffffffff817b6cd0 T ::drop ffffffff817b6c90 T ::default These Rust symbols are basically wrappers around functions "rcu_read_lock" and "rcu_read_unlock". Marking them as inline can reduce the generation of these symbols, and saves the size of code generation for 132 bytes. $ ./scripts/bloat-o-meter vmlinux_old vmlinux_new (Output is demangled for readability) add/remove: 0/10 grow/shrink: 0/1 up/down: 0/-132 (-132) Function old new delta rust_driver_pci::SampleDriver::probe 1041 1034 -7 kernel::sync::rcu::Guard::default 9 - -9 kernel::sync::rcu::Guard::drop 9 - -9 kernel::sync::rcu::read_lock 9 - -9 kernel::sync::rcu::Guard::unlock 9 - -9 kernel::sync::rcu::Guard::new 9 - -9 __pfx__kernel::sync::rcu::Guard::default 16 - -16 __pfx__kernel::sync::rcu::Guard::drop 16 - -16 __pfx__kernel::sync::rcu::read_lock 16 - -16 __pfx__kernel::sync::rcu::Guard::unlock 16 - -16 __pfx__kernel::sync::rcu::Guard::new 16 - -16 Total: Before=23365955, After=23365823, chg -0.00% Link: https://github.com/Rust-for-Linux/linux/issues/1145 Signed-off-by: I Hsin Cheng Reviewed-by: Joel Fernandes Reviewed-by: Benno Lossin Reviewed-by: Charalampos Mitrodimas Acked-by: Miguel Ojeda Signed-off-by: Joel Fernandes --- rust/kernel/sync/rcu.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs index b51d9150ffe2..a32bef6e490b 100644 --- a/rust/kernel/sync/rcu.rs +++ b/rust/kernel/sync/rcu.rs @@ -17,6 +17,7 @@ pub struct Guard(NotThreadSafe); impl Guard { /// Acquires the RCU read side lock and returns a guard. + #[inline] pub fn new() -> Self { // SAFETY: An FFI call with no additional requirements. unsafe { bindings::rcu_read_lock() }; @@ -25,16 +26,19 @@ impl Guard { } /// Explicitly releases the RCU read side lock. + #[inline] pub fn unlock(self) {} } impl Default for Guard { + #[inline] fn default() -> Self { Self::new() } } impl Drop for Guard { + #[inline] fn drop(&mut self) { // SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it. unsafe { bindings::rcu_read_unlock() }; @@ -42,6 +46,7 @@ impl Drop for Guard { } /// Acquires the RCU read side lock. +#[inline] pub fn read_lock() -> Guard { Guard::new() } -- cgit v1.2.3-59-g8ed1b From b95d15980a0f2727a421392ebb50fc6338e43b02 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Dec 2024 18:07:12 +0100 Subject: Revert "rcu/nocb: Fix rcuog wake-up from offline softirq" This reverts commit f7345ccc62a4b880cf76458db5f320725f28e400. swake_up_one_online() has been removed because hrtimers can now assign a proper online target to hrtimers queued from offline CPUs. Therefore remove the related hackery. Link: https://lore.kernel.org/all/20241231170712.149394-4-frederic@kernel.org/ Reviewed-by: Usama Arif Reviewed-by: Joel Fernandes Signed-off-by: Frederic Weisbecker Signed-off-by: Joel Fernandes --- kernel/rcu/tree_nocb.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 109bc2df1d9a..1596812f7f12 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -554,19 +554,13 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); - } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) { + } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { - /* - * Don't do the wake-up upfront on fragile paths. - * Also offline CPUs can't call swake_up_one_online() from - * (soft-)IRQs. Rely on the final deferred wake-up from - * rcutree_report_cpu_dead() - */ rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); -- cgit v1.2.3-59-g8ed1b From 1708bf2cc46a3136cc5928c1027b2a3d25a41f94 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Mar 2025 16:21:50 -0700 Subject: doc: Update LWN RCU API links in whatisRCU.rst This commit adds the 2024 LWN RCU API article set. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- Documentation/RCU/whatisRCU.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 53faeed7c190..be2eb6be16ec 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -15,6 +15,9 @@ to start learning about RCU: | 2014 Big API Table https://lwn.net/Articles/609973/ | 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/ | 2019 Big API Table https://lwn.net/Articles/777165/ +| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/ +| 2024 Background Information https://lwn.net/Articles/988641/ +| 2024 Big API Table https://lwn.net/Articles/988666/ For those preferring video: -- cgit v1.2.3-59-g8ed1b From 79ea7f43f8d5aeefc711d4ce322e0a7fbf41de14 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Feb 2025 00:50:47 +0000 Subject: doc/RCU/listRCU: refine example code for eliminating stale data This patch adjust the example code with following two purpose: * reduce the confusion on not releasing e->lock * emphasize e is valid and not stale with e->lock held Signed-off-by: Wei Yang CC: Boqun Feng CC: Alan Huang Reviewed-by: Alan Huang Link: https://lore.kernel.org/r/20250218005047.27258-1-richard.weiyang@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Joel Fernandes --- Documentation/RCU/listRCU.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst index ed5c9d8c9afe..d8bb98623c12 100644 --- a/Documentation/RCU/listRCU.rst +++ b/Documentation/RCU/listRCU.rst @@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the ``audit_entry`` structure, and modify audit_filter_task() as follows:: - static enum audit_state audit_filter_task(struct task_struct *tsk) + static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key) { struct audit_entry *e; enum audit_state state; @@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to if (e->deleted) { spin_unlock(&e->lock); rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + return NULL; } rcu_read_unlock(); if (state == AUDIT_STATE_RECORD) *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); - return state; + /* As long as e->lock is held, e is valid and + * its value is not stale */ + return e; } } rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + return NULL; } The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the -- cgit v1.2.3-59-g8ed1b From bed3af437f38f9a7a8b8a7de153a421f023c6e0b Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 22 Apr 2025 09:51:45 +0800 Subject: rcuscale: using kcalloc() to relpace kmalloc() It's safer to using kcalloc() because it can prevent overflow problem. Reviewed-by: Paul E. McKenney Signed-off-by: Su Hui Signed-off-by: Joel Fernandes --- kernel/rcu/rcuscale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 0f3059b1b80d..b521d0455992 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -762,7 +762,7 @@ kfree_scale_thread(void *arg) } for (i = 0; i < kfree_alloc_num; i++) { - alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL); + alloc_ptr = kcalloc(kfree_mult, sizeof(struct kfree_obj), GFP_KERNEL); if (!alloc_ptr) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 36f8e3087562bc3f55f584fb42d105dfdf6686f4 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 7 May 2025 19:26:04 +0800 Subject: rcu/nocb: Add Safe checks for access offloaded rdp For built with CONFIG_PROVE_RCU=y and CONFIG_PREEMPT_RT=y kernels, Disable BH does not change the SOFTIRQ corresponding bits in preempt_count(), but change current->softirq_disable_cnt, this resulted in the following splat: WARNING: suspicious RCU usage kernel/rcu/tree_plugin.h:36 Unsafe read of RCU_NOCB offloaded state! stack backtrace: CPU: 0 UID: 0 PID: 22 Comm: rcuc/0 Call Trace: [ 0.407907] [ 0.407910] dump_stack_lvl+0xbb/0xd0 [ 0.407917] dump_stack+0x14/0x20 [ 0.407920] lockdep_rcu_suspicious+0x133/0x210 [ 0.407932] rcu_rdp_is_offloaded+0x1c3/0x270 [ 0.407939] rcu_core+0x471/0x900 [ 0.407942] ? lockdep_hardirqs_on+0xd5/0x160 [ 0.407954] rcu_cpu_kthread+0x25f/0x870 [ 0.407959] ? __pfx_rcu_cpu_kthread+0x10/0x10 [ 0.407966] smpboot_thread_fn+0x34c/0xa50 [ 0.407970] ? trace_preempt_on+0x54/0x120 [ 0.407977] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 0.407982] kthread+0x40e/0x840 [ 0.407990] ? __pfx_kthread+0x10/0x10 [ 0.407994] ? rt_spin_unlock+0x4e/0xb0 [ 0.407997] ? rt_spin_unlock+0x4e/0xb0 [ 0.408000] ? __pfx_kthread+0x10/0x10 [ 0.408006] ? __pfx_kthread+0x10/0x10 [ 0.408011] ret_from_fork+0x40/0x70 [ 0.408013] ? __pfx_kthread+0x10/0x10 [ 0.408018] ret_from_fork_asm+0x1a/0x30 [ 0.408042] Currently, triggering an rdp offloaded state change need the corresponding rdp's CPU goes offline, and at this time the rcuc kthreads has already in parking state. this means the corresponding rcuc kthreads can safely read offloaded state of rdp while it's corresponding cpu is online. This commit therefore add softirq_count() check for Preempt-RT kernels. Suggested-by: Joel Fernandes Reviewed-by: Frederic Weisbecker Signed-off-by: Zqiang Signed-off-by: Joel Fernandes --- kernel/rcu/tree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3c0bbbbb686f..0b0f56f6abc8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -29,7 +29,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || lockdep_is_held(&rdp->nocb_lock) || lockdep_is_held(&rcu_state.nocb_mutex) || - (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) && + ((!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) || softirq_count()) && rdp == this_cpu_ptr(&rcu_data)) || rcu_current_is_nocb_kthread(rdp)), "Unsafe read of RCU_NOCB offloaded state" -- cgit v1.2.3-59-g8ed1b From ed0d0db095dd2b3a609f07d49d1342be9fb5c49d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Feb 2025 05:09:26 -0800 Subject: rcutorture: Comment invocations of tick_dep_set_task() The rcu_torture_reader() and rcu_torture_fwd_prog_cr() functions run CPU-bound for extended periods of time (tens or even hundreds of milliseconds), so they invoke tick_dep_set_task() and tick_dep_clear_task() to ensure that the scheduling-clock tick helps move grace periods forward. So why doesn't rcu_torture_fwd_prog_nr() also invoke tick_dep_set_task() and tick_dep_clear_task()? Because the point of this function is to test RCU's ability to (eventually) force grace periods forward even when the tick has been disabled during long CPU-bound kernel execution. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcutorture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ae7b68043150..393bb5790e0f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2345,7 +2345,7 @@ rcu_torture_reader(void *arg) set_user_nice(current, MAX_NICE); if (irqreader && cur_ops->irq_capable) timer_setup_on_stack(&t, rcu_torture_timer, 0); - tick_dep_set_task(current, TICK_DEP_BIT_RCU); + tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick. do { if (irqreader && cur_ops->irq_capable) { if (!timer_pending(&t)) @@ -3074,7 +3074,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) cver = READ_ONCE(rcu_torture_current_version); gps = cur_ops->get_gp_seq(); rfp->rcu_launder_gp_seq_start = gps; - tick_dep_set_task(current, TICK_DEP_BIT_RCU); + tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick. while (time_before(jiffies, stopat) && !shutdown_time_arrived() && !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { -- cgit v1.2.3-59-g8ed1b From b9af71a26dff591e214dcd3dca7b94c908ccb825 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Mar 2025 08:10:00 -0800 Subject: checkpatch: Deprecate srcu_read_lock_lite() and srcu_read_unlock_lite() Uses of srcu_read_lock_lite() and srcu_read_unlock_lite() are better served by the new srcu_read_lock_fast() and srcu_read_unlock_fast() APIs. As in srcu_read_lock_lite() and srcu_read_unlock_lite() would never have happened had I thought a bit harder a few months ago. Therefore, mark them deprecated. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- scripts/checkpatch.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3d22bf863eec..b3b1939ccd19 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -839,6 +839,8 @@ our %deprecated_apis = ( "kunmap" => "kunmap_local", "kmap_atomic" => "kmap_local_page", "kunmap_atomic" => "kunmap_local", + "srcu_read_lock_lite" => "srcu_read_lock_fast", + "srcu_read_unlock_lite" => "srcu_read_unlock_fast", ); #Create a search pattern for all these strings to speed up a loop below -- cgit v1.2.3-59-g8ed1b From fa11a54cf6cc21f1e18dc7ba71de662561fddc55 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Mar 2025 09:00:35 -0700 Subject: torture: Add --do-{,no-}normal to torture.sh Right now, torture.sh runs normal runs unconditionally, which can be slow and thus annoying when you only want to test --kcsan or --kasan runs. This commit therefore adds a --do-normal argument so that "--kcsan --do-no-kasan --do-no-normal" runs only KCSAN runs. Note that specifying "--do-no-kasan --do-no-kcsan --do-no-normal" gets normal runs, so you should not try to use this as a synonym for --do-none. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/torture.sh | 30 ++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index b64b356f55ff..475f758f6216 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -51,6 +51,8 @@ do_scftorture=yes do_rcuscale=yes do_refscale=yes do_kvfree=yes +do_normal=yes +explicit_normal=no do_kasan=yes do_kcsan=no do_clocksourcewd=yes @@ -128,6 +130,8 @@ do do_refscale=yes do_rt=yes do_kvfree=yes + do_normal=yes + explicit_normal=no do_kasan=yes do_kcsan=yes do_clocksourcewd=yes @@ -161,11 +165,17 @@ do do_refscale=no do_rt=no do_kvfree=no + do_normal=no + explicit_normal=no do_kasan=no do_kcsan=no do_clocksourcewd=no do_srcu_lockdep=no ;; + --do-normal|--do-no-normal|--no-normal) + do_normal=`doyesno "$1" --do-normal` + explicit_normal=yes + ;; --do-rcuscale|--do-no-rcuscale|--no-rcuscale) do_rcuscale=`doyesno "$1" --do-rcuscale` ;; @@ -242,6 +252,17 @@ trap 'rm -rf $T' 0 2 echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log +if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no" +then + # Match old scripts so that "--do-none --do-rcutorture" does + # normal rcutorture testing, but no KASAN or KCSAN testing. + if test $explicit_normal = yes + then + echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log + fi + do_normal=yes +fi + # Calculate rcutorture defaults and apportion time if test -z "$configs_rcutorture" then @@ -332,9 +353,12 @@ function torture_set { local kcsan_kmake_tag= local flavor=$1 shift - curflavor=$flavor - torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test "$do_normal" = "yes" + then + curflavor=$flavor + torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : + fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan -- cgit v1.2.3-59-g8ed1b From 7e01c4c5cff293627209bac4337b1971b2f57d4e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Mar 2025 15:54:48 -0700 Subject: torture: Add testing of RCU's Rust bindings to torture.sh This commit adds a --do-rcu-rust parameter to torture.sh, which invokes a rust_doctests_kernel kunit run. Note that kunit wants a clean source tree, so this runs "make mrproper", which might come as a surprise to some users. Should there be a --mrproper parameter to torture.sh to make the user explicitly ask for it? Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/torture.sh | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 475f758f6216..e03fdaca89b3 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -59,6 +59,7 @@ do_clocksourcewd=yes do_rt=yes do_rcutasksflavors=yes do_srcu_lockdep=yes +do_rcu_rust=no # doyesno - Helper function for yes/no arguments function doyesno () { @@ -89,6 +90,7 @@ usage () { echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" echo " --do-refscale / --do-no-refscale / --no-refscale" echo " --do-rt / --do-no-rt / --no-rt" + echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust" echo " --do-scftorture / --do-no-scftorture / --no-scftorture" echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ | h | d ]" @@ -191,6 +193,9 @@ do --do-rt|--do-no-rt|--no-rt) do_rt=`doyesno "$1" --do-rt` ;; + --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust) + do_rcu_rust=`doyesno "$1" --do-rcu-rust` + ;; --do-scftorture|--do-no-scftorture|--no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; @@ -485,6 +490,46 @@ then torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make fi +if test "$do_rcu_rust" = "yes" +then + echo " --- do-rcu-rust:" Start `date` | tee -a $T/log + rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" + mkdir -p "$rrdir" + echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out + make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 + retcode=$? + echo $retcode > $rrdir/rustavailable.exitcode + cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1 + buildphase=rustavailable + if test "$retcode" -eq 0 + then + echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out + make mrproper > $rrdir/mrproper.out 2>&1 + retcode=$? + echo $retcode > $rrdir/mrproper.exitcode + buildphase=mrproper + fi + if test "$retcode" -eq 0 + then + echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out + ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1 + # @@@ Remove "--arch arm64" in order to test on native architecture? + # @@@ Analyze $rrdir/rust_doctests_kernel.out contents? + retcode=$? + echo $retcode > $rrdir/rust_doctests_kernel.exitcode + buildphase=rust_doctests_kernel + fi + if test "$retcode" -eq 0 + then + echo "rcu-rust($retcode)" $rrdir >> $T/successes + echo Success >> $rrdir/log + else + echo "rcu-rust($retcode)" $rrdir >> $T/failures + echo " --- rcu-rust Test summary:" >> $rrdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log + fi +fi + if test "$do_srcu_lockdep" = "yes" then echo " --- do-srcu-lockdep:" Start `date` | tee -a $T/log -- cgit v1.2.3-59-g8ed1b From aafe12f980640c33882a1d09992560cdf11fbd6f Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 16 Feb 2025 06:52:20 -0500 Subject: rcutorture: Perform more frequent testing of ->gpwrap Currently, the ->gpwrap is not tested (at all per my testing) due to the requirement of a large delta between a CPU's rdp->gp_seq and its node's rnp->gpseq. This results in no testing of ->gpwrap being set. This patch by default adds 5 minutes of testing with ->gpwrap forced by lowering the delta between rdp->gp_seq and rnp->gp_seq to just 8 GPs. All of this is configurable, including the active time for the setting and a full testing cycle. By default, the first 25 minutes of a test will have the _default_ behavior there is right now (ULONG_MAX / 4) delta. Then for 5 minutes, we switch to a smaller delta causing 1-2 wraps in 5 minutes. I believe this is reasonable since we at least add a little bit of testing for usecases where ->gpwrap is set. [ Apply fix for Dan Carpenter's bug report on init path cleanup. ] [ Apply kernel doc warning fix from Akira Yokosawa. ] Tested-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- Documentation/admin-guide/kernel-parameters.txt | 25 ++++++++ kernel/rcu/rcu.h | 4 ++ kernel/rcu/rcutorture.c | 78 ++++++++++++++++++++++++- kernel/rcu/tree.c | 34 ++++++++++- kernel/rcu/tree.h | 1 + 5 files changed, 138 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 76e538c77e31..50d90b2542d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5657,6 +5657,31 @@ are zero, rcutorture acts as if is interpreted they are all non-zero. + rcutorture.gpwrap_lag= [KNL] + Enable grace-period wrap lag testing. Setting + to false prevents the gpwrap lag test from + running. Default is true. + + rcutorture.gpwrap_lag_gps= [KNL] + Set the value for grace-period wrap lag during + active lag testing periods. This controls how many + grace periods differences we tolerate between + rdp and rnp's gp_seq before setting overflow flag. + The default is always set to 8. + + rcutorture.gpwrap_lag_cycle_mins= [KNL] + Set the total cycle duration for gpwrap lag + testing in minutes. This is the total time for + one complete cycle of active and inactive + testing periods. Default is 30 minutes. + + rcutorture.gpwrap_lag_active_mins= [KNL] + Set the duration for which gpwrap lag is active + within each cycle, in minutes. During this time, + the grace-period wrap lag will be set to the + value specified by gpwrap_lag_gps. Default is + 5 minutes. + rcutorture.irqreader= [KNL] Run RCU readers from irq handlers, or, more accurately, from a timer handler. Not all RCU diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index eed2951a4962..516b26024a37 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename, unsigned long c_old, unsigned long c); void rcu_gp_set_torture_wait(int duration); +void rcu_set_gpwrap_lag(unsigned long lag); +int rcu_get_gpwrap_count(int cpu); #else static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq) { @@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename, do { } while (0) #endif static inline void rcu_gp_set_torture_wait(int duration) { } +static inline void rcu_set_gpwrap_lag(unsigned long lag) { } +static inline int rcu_get_gpwrap_count(int cpu) { return 0; } #endif unsigned long long rcutorture_gather_gp_seqs(void); void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 393bb5790e0f..b5db1ac32a3d 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -115,6 +115,10 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing"); torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable"); +torture_param(bool, gpwrap_lag, true, "Enable grace-period wrap lag testing"); +torture_param(int, gpwrap_lag_gps, 8, "Value to set for set_gpwrap_lag during an active testing period."); +torture_param(int, gpwrap_lag_cycle_mins, 30, "Total cycle duration for gpwrap lag testing (in minutes)"); +torture_param(int, gpwrap_lag_active_mins, 5, "Duration for which gpwrap lag is active within each cycle (in minutes)"); torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable"); torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)"); torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable"); @@ -413,6 +417,8 @@ struct rcu_torture_ops { bool (*reader_blocked)(void); unsigned long long (*gather_gp_seqs)(void); void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len); + void (*set_gpwrap_lag)(unsigned long lag); + int (*get_gpwrap_count)(int cpu); long cbflood_max; int irq_capable; int can_boost; @@ -619,6 +625,8 @@ static struct rcu_torture_ops rcu_ops = { : NULL, .gather_gp_seqs = rcutorture_gather_gp_seqs, .format_gp_seqs = rcutorture_format_gp_seqs, + .set_gpwrap_lag = rcu_set_gpwrap_lag, + .get_gpwrap_count = rcu_get_gpwrap_count, .irq_capable = 1, .can_boost = IS_ENABLED(CONFIG_RCU_BOOST), .extendables = RCUTORTURE_MAX_EXTEND, @@ -2432,6 +2440,7 @@ rcu_torture_stats_print(void) int i; long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; + long n_gpwraps = 0; struct rcu_torture *rtcp; static unsigned long rtcv_snap = ULONG_MAX; static bool splatted; @@ -2442,6 +2451,8 @@ rcu_torture_stats_print(void) pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]); batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]); } + if (cur_ops->get_gpwrap_count) + n_gpwraps += cur_ops->get_gpwrap_count(cpu); } for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) { if (pipesummary[i] != 0) @@ -2473,8 +2484,9 @@ rcu_torture_stats_print(void) data_race(n_barrier_attempts), data_race(n_rcu_torture_barrier_error)); pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic. - pr_cont("nocb-toggles: %ld:%ld\n", + pr_cont("nocb-toggles: %ld:%ld ", atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload)); + pr_cont("gpwraps: %ld\n", n_gpwraps); pr_alert("%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) || @@ -3645,6 +3657,57 @@ static int rcu_torture_preempt(void *unused) static enum cpuhp_state rcutor_hp; +static struct hrtimer gpwrap_lag_timer; +static bool gpwrap_lag_active; + +/* Timer handler for toggling RCU grace-period sequence overflow test lag value */ +static enum hrtimer_restart rcu_gpwrap_lag_timer(struct hrtimer *timer) +{ + ktime_t next_delay; + + if (gpwrap_lag_active) { + pr_alert("rcu-torture: Disabling gpwrap lag (value=0)\n"); + cur_ops->set_gpwrap_lag(0); + gpwrap_lag_active = false; + next_delay = ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0); + } else { + pr_alert("rcu-torture: Enabling gpwrap lag (value=%d)\n", gpwrap_lag_gps); + cur_ops->set_gpwrap_lag(gpwrap_lag_gps); + gpwrap_lag_active = true; + next_delay = ktime_set(gpwrap_lag_active_mins * 60, 0); + } + + if (torture_must_stop_irq()) + return HRTIMER_NORESTART; + + hrtimer_forward_now(timer, next_delay); + return HRTIMER_RESTART; +} + +static int rcu_gpwrap_lag_init(void) +{ + if (!gpwrap_lag) + return 0; + + if (gpwrap_lag_cycle_mins <= 0 || gpwrap_lag_active_mins <= 0) { + pr_alert("rcu-torture: lag timing parameters must be positive\n"); + return -EINVAL; + } + + hrtimer_setup(&gpwrap_lag_timer, rcu_gpwrap_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + gpwrap_lag_active = false; + hrtimer_start(&gpwrap_lag_timer, + ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0), HRTIMER_MODE_REL); + + return 0; +} + +static void rcu_gpwrap_lag_cleanup(void) +{ + hrtimer_cancel(&gpwrap_lag_timer); + cur_ops->set_gpwrap_lag(0); + gpwrap_lag_active = false; +} static void rcu_torture_cleanup(void) { @@ -3814,6 +3877,9 @@ rcu_torture_cleanup(void) torture_cleanup_end(); if (cur_ops->gp_slow_unregister) cur_ops->gp_slow_unregister(NULL); + + if (gpwrap_lag && cur_ops->set_gpwrap_lag) + rcu_gpwrap_lag_cleanup(); } static void rcu_torture_leak_cb(struct rcu_head *rhp) @@ -4310,9 +4376,17 @@ rcu_torture_init(void) } if (object_debug) rcu_test_debug_objects(); - torture_init_end(); + if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister)) cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay); + + if (gpwrap_lag && cur_ops->set_gpwrap_lag) { + firsterr = rcu_gpwrap_lag_init(); + if (torture_init_error(firsterr)) + goto unwind; + } + + torture_init_end(); return 0; unwind: diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 659f83e71048..4f36a52d3783 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *); static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .gpwrap = true, }; + +int rcu_get_gpwrap_count(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + return READ_ONCE(rdp->gpwrap_count); +} +EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count); + static struct rcu_state rcu_state = { .level = { &rcu_state.node[0] }, .gp_state = RCU_GP_IDLE, @@ -757,6 +766,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t) smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true); } +static unsigned long seq_gpwrap_lag = ULONG_MAX / 4; + +/** + * rcu_set_gpwrap_lag - Set RCU GP sequence overflow lag value. + * @lag_gps: Set overflow lag to this many grace period worth of counters + * which is used by rcutorture to quickly force a gpwrap situation. + * @lag_gps = 0 means we reset it back to the boot-time value. + */ +void rcu_set_gpwrap_lag(unsigned long lag_gps) +{ + unsigned long lag_seq_count; + + lag_seq_count = (lag_gps == 0) + ? ULONG_MAX / 4 + : lag_gps << RCU_SEQ_CTR_SHIFT; + WRITE_ONCE(seq_gpwrap_lag, lag_seq_count); +} +EXPORT_SYMBOL_GPL(rcu_set_gpwrap_lag); + /* * When trying to report a quiescent state on behalf of some other CPU, * it is our responsibility to check for and handle potential overflow @@ -767,9 +795,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t) static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) { raw_lockdep_assert_held_rcu_node(rnp); - if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4, - rnp->gp_seq)) + if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_gpwrap_lag, + rnp->gp_seq)) { WRITE_ONCE(rdp->gpwrap, true); + WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1); + } if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq)) rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a9a811d9d7a3..63bea388c243 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -183,6 +183,7 @@ struct rcu_data { bool core_needs_qs; /* Core waits for quiescent state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible ->gp_seq wrap. */ + unsigned int gpwrap_count; /* Count of GP sequence wrap. */ bool cpu_started; /* RCU watching this onlining CPU. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ -- cgit v1.2.3-59-g8ed1b From d72e6c0bce15468445b1668869287011d2bab882 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 May 2025 16:42:28 -0700 Subject: torture: Check for "Call trace:" as well as "Call Trace:" Different architectures capitalize their splats differently. Who knew? This commit therefore checks for both arm64 "Call trace:" and x86 "Call Trace:". Reported-by: Joel Fernandes Closes: https://lore.kernel.org/all/553c33d8-2b51-4772-8aef-97b0163bc78e@nvidia.com/ Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/console-badness.sh | 2 +- tools/testing/selftests/rcutorture/bin/parse-console.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index aad51e7c0183..991fb11306eb 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney -grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index b07c11cf6929..21e6ba3615f6 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -148,7 +148,7 @@ then summary="$summary KCSAN: $n_kcsan" fi fi - n_calltrace=`grep -c 'Call Trace:' $file` + n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" -- cgit v1.2.3-59-g8ed1b From 4b5eb4b6b22486dff5dcc9bb9985c4ed769593f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 May 2025 16:42:29 -0700 Subject: rcutorture: Reduce TREE01 CPU overcommit The TREE01.boot nr_cpus kernel boot parameter has been set to 43 for more than seven years, but it can cause RCU CPU stall warnings on arm64, most of the time involving the stop-machine subsystem. This should not be too surprising, given that this causes 43 vCPUs to spin with interrupts disabled when there are only eight physical CPUs. The point of this CPU overcommit is to test the ability of expedited RCU grace period initialization to handle races with incoming CPUs that have never previously been online. But limiting to 17 CPUs instead of 43 allows time for this code to be exercised, and eliminates (or at least greatly reduces) the incidence of RCU CPU stall warnings on arm64. So this commit therefore sets nr_cpus=17 in TREE01.boot. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 40af3df0f397..1cc5b47dde28 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1,4 +1,4 @@ -maxcpus=8 nr_cpus=43 +maxcpus=8 nr_cpus=17 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 -- cgit v1.2.3-59-g8ed1b From 9ffc09de883d262bf5b40dbdc8b34ce11c58ba0d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 May 2025 16:42:30 -0700 Subject: rcutorture: Remove MAXSMP and CPUMASK_OFFSTACK from TREE01 Back in the day, rcutorture was about the only thing that tested off-stack CPU masks, but now any arm64 system with more than 256 CPUs tests it full time. In fact, it is necessary to hack the kernel to prevent such a system from testing off-stack CPU masks. This means that there is no longer much point in rcutorture going out of its way to test this. And given the differences in how CPUMASK_OFFSTACK is enabled in x86 and arm64, rcutorture would need to go out of its way. This commit therefore removes CONFIG_CPUMASK_OFFSTACK=y (and the CONFIG_MAXSMP=y required to enable it on x86) from TREE01. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/configs/rcu/TREE01 | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 8ae41d5f81a3..54b1600c7eb5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_MAXSMP=y -CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n -- cgit v1.2.3-59-g8ed1b From cbb44d9c45e91ec132a48812bde06a6e608ea6e1 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 10 Apr 2025 10:51:53 -0400 Subject: rcutorture: Fix issue with re-using old images on ARM64 On ARM64, when running with --configs '36*SRCU-P', I noticed that only 1 instance instead of 36 for starting. Fix it by checking for Image files, instead of bzImage which ARM does not seem to have. With this I see all 36 instances running at the same time in the batch. Tested-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ad79784e552d..957800c9ffba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \ cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" -- cgit v1.2.3-59-g8ed1b