From dfd458a95d78ce31855fe06bbfde4f4fe60c40db Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Fri, 8 Mar 2024 18:34:04 +0100 Subject: rcu: Add data structures for synchronize_rcu() The synchronize_rcu() call is going to be reworked, thus this patch adds dedicated fields into the rcu_state structure. Reviewed-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index df48160b3136..b942b9437438 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -315,6 +315,13 @@ do { \ __set_current_state(TASK_RUNNING); \ } while (0) +#define SR_NORMAL_GP_WAIT_HEAD_MAX 5 + +struct sr_wait_node { + atomic_t inuse; + struct llist_node node; +}; + /* * RCU global state, including node hierarchy. This hierarchy is * represented in "heap" form in a dense array. The root (first level) @@ -400,6 +407,13 @@ struct rcu_state { /* Synchronize offline with */ /* GP pre-initialization. */ int nocb_is_setup; /* nocb is setup from boot */ + + /* synchronize_rcu() part. */ + struct llist_head srs_next; /* request a GP users. */ + struct llist_node *srs_wait_tail; /* wait for GP users. */ + struct llist_node *srs_done_tail; /* ready for GP users. */ + struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX]; + struct work_struct srs_cleanup_work; }; /* Values for rcu_state structure's gp_flags field. */ -- cgit v1.2.3-59-g8ed1b From ae2b217ab542d0db0ca1a6de4f442201a1982f00 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Mar 2024 11:15:01 -0800 Subject: rcu: Make hotplug operations track GP state, not flags Currently, there are rcu_data structure fields named ->rcu_onl_gp_seq and ->rcu_ofl_gp_seq that track the rcu_state.gp_flags field at the time of the corresponding CPU's last online or offline operation, respectively. However, this information is not particularly useful. It would be better to instead track the grace period state kept in rcu_state.gp_state. This would also be consistent with the initialization in rcu_boot_init_percpu_data(), which is to RCU_GP_CLEANED (an rcu_state.gp_state value), and also with the diagnostics in rcu_implicit_dynticks_qs(), whose format is consistent with an integer, not a bitmask. This commit therefore makes this change and changes the names to ->rcu_onl_gp_flags and ->rcu_ofl_gp_flags, respectively. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.c | 12 ++++++------ kernel/rcu/tree.h | 4 ++-- kernel/rcu/tree_plugin.h | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7149b2d5cdd6..306f55b81d10 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -841,8 +841,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask); pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n", __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)], - (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, - (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); + (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_state, + (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_state); return 1; /* Break things loose after complaining. */ } @@ -4420,9 +4420,9 @@ rcu_boot_init_percpu_data(int cpu) WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu))); rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; - rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; + rdp->rcu_ofl_gp_state = RCU_GP_CLEANED; rdp->rcu_onl_gp_seq = rcu_state.gp_seq; - rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; + rdp->rcu_onl_gp_state = RCU_GP_CLEANED; rdp->last_sched_clock = jiffies; rdp->cpu = cpu; rcu_boot_init_nocb_percpu_data(rdp); @@ -4682,7 +4682,7 @@ void rcutree_report_cpu_starting(unsigned int cpu) ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus); rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq); - rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags); + rdp->rcu_onl_gp_state = READ_ONCE(rcu_state.gp_state); /* An incoming CPU should never be blocking a grace period. */ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ @@ -4733,7 +4733,7 @@ void rcutree_report_cpu_dead(void) arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); - rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); + rdp->rcu_ofl_gp_state = READ_ONCE(rcu_state.gp_state); if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ /* Report quiescent state -before- changing ->qsmaskinitnext! */ rcu_disable_urgency_upon_qs(rdp); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index df48160b3136..ff4d8b60554b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -273,9 +273,9 @@ struct rcu_data { bool rcu_iw_pending; /* Is ->rcu_iw pending? */ unsigned long rcu_iw_gp_seq; /* ->gp_seq associated with ->rcu_iw. */ unsigned long rcu_ofl_gp_seq; /* ->gp_seq at last offline. */ - short rcu_ofl_gp_flags; /* ->gp_flags at last offline. */ + short rcu_ofl_gp_state; /* ->gp_state at last offline. */ unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */ - short rcu_onl_gp_flags; /* ->gp_flags at last online. */ + short rcu_onl_gp_state; /* ->gp_state at last online. */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 36a8b5dbf5b5..340bbefe5f65 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -805,8 +805,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) rdp = per_cpu_ptr(&rcu_data, cpu); pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", cpu, ".o"[rcu_rdp_cpu_online(rdp)], - (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, - (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); + (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_state, + (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_state); } } -- cgit v1.2.3-59-g8ed1b From 462df2f543ae360e79fcaa1b498d2a1a0c2a5b63 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Fri, 8 Mar 2024 18:34:07 +0100 Subject: rcu: Support direct wake-up of synchronize_rcu() users This patch introduces a small enhancement which allows to do a direct wake-up of synchronize_rcu() callers. It occurs after a completion of grace period, thus by the gp-kthread. Number of clients is limited by the hard-coded maximum allowed threshold. The remaining part, if still exists is deferred to a main worker. Link: https://lore.kernel.org/lkml/Zd0ZtNu+Rt0qXkfS@lothringen/ Reviewed-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.c | 24 +++++++++++++++++++++++- kernel/rcu/tree.h | 6 ++++++ 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2e1c5be6d64b..2a270abade4d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1645,7 +1645,8 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work) */ static void rcu_sr_normal_gp_cleanup(void) { - struct llist_node *wait_tail; + struct llist_node *wait_tail, *next, *rcu; + int done = 0; wait_tail = rcu_state.srs_wait_tail; if (wait_tail == NULL) @@ -1653,11 +1654,32 @@ static void rcu_sr_normal_gp_cleanup(void) rcu_state.srs_wait_tail = NULL; ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail); + WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail)); + + /* + * Process (a) and (d) cases. See an illustration. + */ + llist_for_each_safe(rcu, next, wait_tail->next) { + if (rcu_sr_is_wait_head(rcu)) + break; + + rcu_sr_normal_complete(rcu); + // It can be last, update a next on this step. + wait_tail->next = next; + + if (++done == SR_MAX_USERS_WAKE_FROM_GP) + break; + } // concurrent sr_normal_gp_cleanup work might observe this update. smp_store_release(&rcu_state.srs_done_tail, wait_tail); ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail); + /* + * We schedule a work in order to perform a final processing + * of outstanding users(if still left) and releasing wait-heads + * added by rcu_sr_normal_gp_init() call. + */ schedule_work(&rcu_state.srs_cleanup_work); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index b942b9437438..2832787cee1d 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -315,6 +315,12 @@ do { \ __set_current_state(TASK_RUNNING); \ } while (0) +/* + * A max threshold for synchronize_rcu() users which are + * awaken directly by the rcu_gp_kthread(). Left part is + * deferred to the main worker. + */ +#define SR_MAX_USERS_WAKE_FROM_GP 5 #define SR_NORMAL_GP_WAIT_HEAD_MAX 5 struct sr_wait_node { -- cgit v1.2.3-59-g8ed1b