From e63c887cfed2077b2db29f27024d0a9f88151c40 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2015 12:56:43 -0800 Subject: rcu: Convert from rcu_preempt_state to *rcu_state_p It would be good to move more code from #ifdef to IS_ENABLED(), but that does not work if the body of the IS_ENABLED() "if" statement references a variable (such as rcu_preempt_state) that does not exist if the IS_ENABLED() Kconfig variable is not set. This commit therefore substitutes *rcu_state_p for all uses of rcu_preempt_state in kernel/rcu/tree_preempt.h, which should enable elimination of a few #ifdefs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 58b1ebdc4387..9a04764dd239 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -150,7 +150,7 @@ static void rcu_preempt_note_context_switch(void) !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = this_cpu_ptr(rcu_preempt_state.rda); + rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); @@ -353,8 +353,7 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grplo, rnp->grphi, !!rnp->gp_tasks); - rcu_report_unblock_qs_rnp(&rcu_preempt_state, - rnp, flags); + rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -370,7 +369,7 @@ void rcu_read_unlock_special(struct task_struct *t) * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) - rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); + rcu_report_exp_rnp(rcu_state_p, rnp, true); } else { local_irq_restore(flags); } @@ -500,7 +499,7 @@ static void rcu_preempt_check_callbacks(void) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); + rcu_do_batch(rcu_state_p, this_cpu_ptr(&rcu_preempt_data)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -510,7 +509,7 @@ static void rcu_preempt_do_callbacks(void) */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_preempt_state, -1, 0); + __call_rcu(head, func, rcu_state_p, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu); @@ -711,7 +710,7 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) void synchronize_rcu_expedited(void) { struct rcu_node *rnp; - struct rcu_state *rsp = &rcu_preempt_state; + struct rcu_state *rsp = rcu_state_p; unsigned long snap; int trycount = 0; @@ -798,7 +797,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state); + _rcu_barrier(rcu_state_p); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -807,7 +806,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); + rcu_init_one(rcu_state_p, &rcu_preempt_data); } /* @@ -1172,7 +1171,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct sched_param sp; struct task_struct *t; - if (&rcu_preempt_state != rsp) + if (rcu_state_p != rsp) return 0; if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) -- cgit v1.2.3-59-g8ed1b From 727b705baf7d091a9bc5494d7f1d9699b6932531 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2015 14:49:26 -0800 Subject: rcu: Eliminate a few RCU_BOOST #ifdefs in favor of IS_ENABLED() This commit removes a few RCU_BOOST #ifdefs, replacing them with IS_ENABLED()-protected return statements. This relies on the optimizer to remove any resulting dead code. There are several other RCU_BOOST #ifdefs, however these rely on some per-CPU variables that are available only under RCU_BOOST. These might be converted later, if the simplification proves to outweigh the increase in memory footprint. One hoped-for advantage is more easily locating compiler errors in obscure combinations of Kconfig parameters. Signed-off-by: Paul E. McKenney Cc: --- kernel/rcu/tree.h | 2 -- kernel/rcu/tree_plugin.h | 35 ++++++++++++++++++++--------------- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a69d3dab2ec4..dd5ce4034875 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -170,7 +170,6 @@ struct rcu_node { /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there can cannot be any such task. */ -#ifdef CONFIG_RCU_BOOST struct list_head *boost_tasks; /* Pointer to first task that needs to be */ /* priority boosted, or NULL if no priority */ @@ -208,7 +207,6 @@ struct rcu_node { unsigned long n_balk_nos; /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ -#endif /* #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU wait_queue_head_t nocb_gp_wq[2]; /* Place for rcu_nocb_kthread() to wait GP. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 9a04764dd239..8f8142778684 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -43,7 +43,17 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); -#endif /* #ifdef CONFIG_RCU_BOOST */ +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, + * all uses are in dead code. Provide a definition to keep the compiler + * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. + * This probably needs to be excluded from -rt builds. + */ +#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ @@ -180,10 +190,9 @@ static void rcu_preempt_note_context_switch(void) if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); rnp->gp_tasks = &t->rcu_node_entry; -#ifdef CONFIG_RCU_BOOST - if (rnp->boost_tasks != NULL) + if (IS_ENABLED(CONFIG_RCU_BOOST) && + rnp->boost_tasks != NULL) rnp->boost_tasks = rnp->gp_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ } else { list_add(&t->rcu_node_entry, &rnp->blkd_tasks); if (rnp->qsmask & rdp->grpmask) @@ -263,9 +272,7 @@ void rcu_read_unlock_special(struct task_struct *t) bool empty_exp_now; unsigned long flags; struct list_head *np; -#ifdef CONFIG_RCU_BOOST bool drop_boost_mutex = false; -#endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; union rcu_special special; @@ -331,12 +338,12 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rnp->boost_tasks) - rnp->boost_tasks = np; - /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ - drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; -#endif /* #ifdef CONFIG_RCU_BOOST */ + if (IS_ENABLED(CONFIG_RCU_BOOST)) { + if (&t->rcu_node_entry == rnp->boost_tasks) + rnp->boost_tasks = np; + /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ + drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; + } /* * If this was the last task on the current list, and if @@ -358,11 +365,9 @@ void rcu_read_unlock_special(struct task_struct *t) raw_spin_unlock_irqrestore(&rnp->lock, flags); } -#ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (drop_boost_mutex) + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) rt_mutex_unlock(&rnp->boost_mtx); -#endif /* #ifdef CONFIG_RCU_BOOST */ /* * If this was the last task on the expedited lists, -- cgit v1.2.3-59-g8ed1b From b28a7c016618e5e32e0703e3dd111dbba02715ff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2015 07:39:27 -0800 Subject: rcu: Tell the compiler that rcu_state_p is immutable This commit adds a "const" tag to the declarations of rcu_state_p, which should allow the compiler to generate better code and also to catch erroneous assignments to this variable. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0628df155970..f031700514dd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -110,7 +110,7 @@ struct rcu_state sname##_state = { \ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -static struct rcu_state *rcu_state_p; +static struct rcu_state *const rcu_state_p; LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 8f8142778684..18b057adc21a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -100,7 +100,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state_p = &rcu_preempt_state; +static struct rcu_state *const rcu_state_p = &rcu_preempt_state; static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, @@ -834,7 +834,7 @@ void exit_rcu(void) #else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_state_p = &rcu_sched_state; +static struct rcu_state *const rcu_state_p = &rcu_sched_state; /* * Tell them what RCU they are running. -- cgit v1.2.3-59-g8ed1b From 2927a689e8ad5c12d6300b41e873d2b7957bc0e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2015 07:53:04 -0800 Subject: rcu: Create an immutable rcu_data_p pointer to default rcu_data structure This commit creates an immutable rcu_data_p pointer that references rcu_preempt_data for TREE_PREEMPT_RCU builds and that references rcu_sched_data for TREE_RCU builds. This rcu_data_p pointer will enable more code to move from #ifdef to IS_ENABLED(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + kernel/rcu/tree_plugin.h | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f031700514dd..213f644d6fb1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -111,6 +111,7 @@ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); static struct rcu_state *const rcu_state_p; +static struct rcu_data __percpu *const rcu_data_p; LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 18b057adc21a..5c0122f09ed0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -101,6 +101,7 @@ static void __init rcu_bootup_announce_oddness(void) RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); static struct rcu_state *const rcu_state_p = &rcu_preempt_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, @@ -126,11 +127,11 @@ static void __init rcu_bootup_announce(void) */ static void rcu_preempt_qs(void) { - if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + if (!__this_cpu_read(rcu_data_p->passed_quiesce)) { trace_rcu_grace_period(TPS("rcu_preempt"), - __this_cpu_read(rcu_preempt_data.gpnum), + __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + __this_cpu_write(rcu_data_p->passed_quiesce, 1); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -495,8 +496,8 @@ static void rcu_preempt_check_callbacks(void) return; } if (t->rcu_read_lock_nesting > 0 && - __this_cpu_read(rcu_preempt_data.qs_pending) && - !__this_cpu_read(rcu_preempt_data.passed_quiesce)) + __this_cpu_read(rcu_data_p->qs_pending) && + !__this_cpu_read(rcu_data_p->passed_quiesce)) t->rcu_read_unlock_special.b.need_qs = true; } @@ -504,7 +505,7 @@ static void rcu_preempt_check_callbacks(void) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(rcu_state_p, this_cpu_ptr(&rcu_preempt_data)); + rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -811,7 +812,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(rcu_state_p, &rcu_preempt_data); + rcu_init_one(rcu_state_p, rcu_data_p); } /* @@ -835,6 +836,7 @@ void exit_rcu(void) #else /* #ifdef CONFIG_PREEMPT_RCU */ static struct rcu_state *const rcu_state_p = &rcu_sched_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data; /* * Tell them what RCU they are running. -- cgit v1.2.3-59-g8ed1b From 3382adbc1bb8c80ea512243acf6059564287620b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2015 15:41:24 -0800 Subject: rcu: Eliminate a few CONFIG_RCU_NOCB_CPU_ALL #ifdefs This commit converts several CONFIG_RCU_NOCB_CPU_ALL #ifdefs to instead use IS_ENABLED(). This change should help avoid hiding code from compiler diagnostics. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutree.h | 2 -- kernel/rcu/tree_plugin.h | 22 ++++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel/rcu') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bb0eee665b..5ec20bc4af76 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1153,13 +1153,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) +#ifdef CONFIG_TINY_RCU static inline int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; return 0; } -#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ +#endif /* #ifdef CONFIG_TINY_RCU */ #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..0bd400b02430 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -31,9 +31,7 @@ #define __LINUX_RCUTREE_H void rcu_note_context_switch(void); -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *delta_jiffies); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5c0122f09ed0..0730bfcf65db 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1372,13 +1372,12 @@ static void rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(NULL); + return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) + ? 0 : rcu_cpu_has_callbacks(NULL); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up @@ -1485,11 +1484,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * * The caller must have disabled interrupts. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *dj) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { + *dj = ULONG_MAX; + return 0; + } + /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; @@ -1516,7 +1519,6 @@ int rcu_needs_cpu(unsigned long *dj) } return 0; } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Prepare a CPU for idle from an RCU perspective. The first major task @@ -1530,7 +1532,6 @@ int rcu_needs_cpu(unsigned long *dj) */ static void rcu_prepare_for_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1538,6 +1539,9 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) + return; + /* Handle nohz enablement switches conservatively. */ tne = READ_ONCE(tick_nohz_active); if (tne != rdtp->tick_nohz_enabled_snap) { @@ -1585,7 +1589,6 @@ static void rcu_prepare_for_idle(void) if (needwake) rcu_gp_kthread_wake(rsp); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1595,12 +1598,11 @@ static void rcu_prepare_for_idle(void) */ static void rcu_cleanup_after_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL - if (rcu_is_nocb_cpu(smp_processor_id())) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || + rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* -- cgit v1.2.3-59-g8ed1b From cd73ca21cd2bb3711b8d80ba74c90d37ef15fe4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Mar 2015 11:53:52 -0700 Subject: rcu: Force wakeup of rcu_gp_kthread at grace-period end The rcu_gp_kthread_wake() refuses to do a wakeup unless at least one of the ->gp_flags bits are set, which normally will not be the case when the last quiescent state is reported. This results in up to a 3-jiffy delay given default Kconfig settings. This commit therefore has rcu_report_qs_rsp() set RCU_GP_FLAG_FQS before invoking rcu_gp_kthread_wake() in order to force a more immediate wakeup at grace-period end, thus reducing grace-period latencies. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 213f644d6fb1..7d369b829598 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2139,6 +2139,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); rcu_gp_kthread_wake(rsp); } -- cgit v1.2.3-59-g8ed1b From 5ce035fb7df413a72b6b956d4aa212a866f3b565 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 30 Mar 2015 16:46:16 -0700 Subject: rcu: tree_plugin: Use bool function return values of true/false not 1/0 Use the normal return values for bool functions Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0730bfcf65db..f8af20273868 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -3056,9 +3056,9 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) if (tick_nohz_full_cpu(smp_processor_id()) && (!rcu_gp_in_progress(rsp) || ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ))) - return 1; + return true; #endif /* #ifdef CONFIG_NO_HZ_FULL */ - return 0; + return false; } /* -- cgit v1.2.3-59-g8ed1b From 82efed06d5e370f1526ec93ff4c2c2496542f615 Mon Sep 17 00:00:00 2001 From: Patrick Daly Date: Tue, 7 Apr 2015 15:12:07 -0700 Subject: rcu: Fix missing task information during rcu-preempt stall The first item list_for_each_entry_continue(alist) iterates over is alist->next, rather than alist itself. Consequently, rcu_print_detail_task_stall_rnp() skips the task referenced by gp_tasks. Use gp_tasks->prev as the argument to list_for_each_entry_continue() instead. Signed-off-by: Patrick Daly Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f8af20273868..853c7b8ea833 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -395,7 +395,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) sched_show_task(t); @@ -452,7 +452,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; rcu_print_task_stall_begin(rnp); - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { pr_cont(" P%d", t->pid); -- cgit v1.2.3-59-g8ed1b From 81e701e4376232b2779f52f15e3b7413131bd8e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Apr 2015 11:02:25 -0700 Subject: rcu: Add more debug info on "kthread starved" RCU CPU stall warnings This commit adds grace number and command-flags information to the "kthread starved" message that is sometimes printed out as part of RCU CPU stall warnings. This message is caused by the corresponding RCU grace-period kthread not having run for at least two seconds, and this added information can be helpful when debugging. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7d369b829598..52f064ac7b49 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1136,8 +1136,9 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) j = jiffies; gpa = READ_ONCE(rsp->gp_activity); if (j - gpa > 2 * HZ) - pr_err("%s kthread starved for %ld jiffies!\n", - rsp->name, j - gpa); + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n", + rsp->name, j - gpa, + rsp->gpnum, rsp->completed, rsp->gp_flags); } /* -- cgit v1.2.3-59-g8ed1b From 30ff1533b8f75255bdf02bc3361f1c558138f471 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 May 2015 13:01:38 -0700 Subject: rcu: Make synchronize_sched_expedited() call wait_rcu_gp() Currently, synchronize_sched_expedited() will call synchronize_sched() if there is danger of counter wrap. But if configuration says to always do expedited grace periods, synchronize_sched() will just call synchronize_sched_expedited() right back again. In theory, the old expedited operations will complete, the counters will get back in synch, and the recursion will end. But we could easily run out of stack long before that time. This commit therefore makes synchronize_sched_expedited() invoke the underlying wait_rcu_gp(call_rcu_sched) instead of synchronize_sched(), the same as all the other calls out from synchronize_sched_expedited(). This bug was introduced by commit 1924bcb02597 (Avoid counter wrap in synchronize_sched_expedited()). Reported-by: Rik van Riel Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 52f064ac7b49..f02830c85ec2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3290,7 +3290,7 @@ void synchronize_sched_expedited(void) if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start), (ulong)atomic_long_read(&rsp->expedited_done) + ULONG_MAX / 8)) { - synchronize_sched(); + wait_rcu_gp(call_rcu_sched); atomic_long_inc(&rsp->expedited_wrap); return; } -- cgit v1.2.3-59-g8ed1b From c92fb05795f57463cb763a82f9053d294f77ea87 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Tue, 5 May 2015 21:57:06 +0800 Subject: rcu: Make rcu_*_data variables static rcu_bh_data, rcu_sched_data and rcu_preempt_data are never used outside kernel/rcu/tree.c and thus can be made static. Doing so fixes a section mismatch warning reported by clang when building LLVMLinux with -Wsection, because these variables were declared in .data..percpu and defined in .data..percpu..shared_aligned since commit 11bbb235c26f ("rcu: Use DEFINE_PER_CPU_SHARED_ALIGNED for rcu_data"). Signed-off-by: Nicolas Iooss Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f02830c85ec2..6efb0b66a30d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ DEFINE_RCU_TPS(sname) \ -DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .rda = &sname##_data, \ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index dd5ce4034875..8079c5b22a8f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -517,14 +517,11 @@ extern struct list_head rcu_struct_flavors; * RCU implementation internal declarations: */ extern struct rcu_state rcu_sched_state; -DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); extern struct rcu_state rcu_bh_state; -DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); #ifdef CONFIG_PREEMPT_RCU extern struct rcu_state rcu_preempt_state; -DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_BOOST -- cgit v1.2.3-59-g8ed1b From 82072c4fcf095ce03a05860365c157c8bb58945b Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 11 May 2015 18:12:27 +0200 Subject: rcu: Change function declaration to bool rcu_cpu_has_callbacks() is declared int. The current declaration was introduced in commit c0f4dfd4f90f (rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks). But it is actually returning bool and as the function description states " * Return true if the specified CPU has any callback....", this probably should be a bool as all (3) call-sites currently treat it as bool. Type-checking coccinelle spatches are being used to locate type mismatches between function signatures and return values in this case this produced: ./kernel/rcu/tree.c:3538 WARNING: return of wrong type int != bool, Patch was compile tested with x86_64_defconfig (implies CONFIG_TREE_RCU=y) Patch is against 4.1-rc3 (localversion-next is -next-20150511) and fixes Signed-off-by: Nicholas Mc Guire Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6efb0b66a30d..7b9dd4f62569 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3496,7 +3496,7 @@ static int rcu_pending(void) * non-NULL, store an indication of whether all callbacks are lazy. * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) +static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) { bool al = true; bool hc = false; -- cgit v1.2.3-59-g8ed1b From ea46351cea79f54729d8546e5bd7f091a2e6484b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2015 14:05:26 -0800 Subject: rcu: Eliminate HOTPLUG_CPU #ifdef in favor of IS_ENABLED() This commit removes a HOTPLUG_CPU #ifdef, replacing it with IS_ENABLED()-protected return statements. This relies on the optimizer to remove any resulting dead code. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0628df155970..f2e888c8ec5a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2335,8 +2335,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) rcu_report_qs_rdp(rdp->cpu, rsp, rdp); } -#ifdef CONFIG_HOTPLUG_CPU - /* * Send the specified CPU's RCU callbacks to the orphanage. The * specified CPU must be offline, and the caller must hold the @@ -2347,7 +2345,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (rcu_is_nocb_cpu(rdp->cpu)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2406,7 +2404,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ - if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) return; /* Do the accounting first. */ @@ -2453,6 +2452,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); RCU_TRACE(struct rcu_node *rnp = rdp->mynode); + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + RCU_TRACE(mask = rdp->grpmask); trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), @@ -2481,7 +2483,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) long mask; struct rcu_node *rnp = rnp_leaf; - if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) return; for (;;) { mask = rnp->grpmask; @@ -2512,6 +2515,9 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; raw_spin_lock_irqsave(&rnp->lock, flags); @@ -2533,6 +2539,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); @@ -2547,26 +2556,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) cpu, rdp->qlen, rdp->nxtlist); } -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) -{ -} - -static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) -{ -} - -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) -{ -} - -static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - /* * Invoke any RCU callbacks that have made it to the end of their grace * period. Thottle as specified by rdp->blimit. -- cgit v1.2.3-59-g8ed1b From 0a0ba1c93f8a0ff28bacec0d1d018081e762e2f0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 8 Mar 2015 14:20:30 -0700 Subject: rcu: Adjust ->lock acquisition for tasks no longer migrating Tasks are no longer migrated away from a given rcu_node structure when all CPUs corresponding to that rcu_node structure have gone offline. This means that rcu_read_unlock_special() no longer needs to loop retrying rcu_node ->lock acquisition because the current task is guaranteed to stay put. This commit takes a small and paranoid step towards relying on this guarantee by placing a WARN_ON_ONCE() just after the early exit from the lock-acquisition loop. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 58b1ebdc4387..c8340e929eb4 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -307,9 +307,11 @@ void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.blocked = false; /* - * Remove this task from the list it blocked on. The - * task can migrate while we acquire the lock, but at - * most one time. So at most two passes through loop. + * Remove this task from the list it blocked on. The task + * now remains queued on the rcu_node corresponding to + * the CPU it first blocked on, so the first attempt to + * acquire the task's rcu_node's ->lock will succeed. + * Keep the loop and add a WARN_ON() out of sheer paranoia. */ for (;;) { rnp = t->rcu_blocked_node; @@ -317,6 +319,7 @@ void rcu_read_unlock_special(struct task_struct *t) smp_mb__after_unlock_lock(); if (rnp == t->rcu_blocked_node) break; + WARN_ON_ONCE(1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); -- cgit v1.2.3-59-g8ed1b From c5b5539506f86469dca08310657ca93bbb6c00a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Mar 2015 16:58:41 -0700 Subject: rcu: Remove dead code from force_qs_rnp() Because force_qs_rnp() is invoked only from the force-quiescent-state code which runs only in the context of the grace-period kthread, a grace period must always be in progress throughout force_qs_rnp()'s execution. This commit therefore removes the rcu_gp_in_progress() check and the associated dead code. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f2e888c8ec5a..e338a12c3a1b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2720,10 +2720,6 @@ static void force_qs_rnp(struct rcu_state *rsp, mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (!rcu_gp_in_progress(rsp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } if (rnp->qsmask == 0) { if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || -- cgit v1.2.3-59-g8ed1b From cce7f1fc015a98ca9263bd5730c00258bc214e53 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Mar 2015 17:00:56 -0700 Subject: rcu: Remove redundant offline check Because offline CPUs are propagated up the rcu_node tree's ->qsmaskinit bits just before each grace period starts, the ->qsmaskinit bit cannot be clear when the corresponding ->qsmask bit is set. Furthermore, this condition used to correspond to a CPU that was on its way offline, and making RCU's notion of an offline CPU more precise has eliminated this situation. This commit therefore removes the now-redundant offline check from force_qs_rnp(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e338a12c3a1b..a1df68fce545 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2749,8 +2749,6 @@ static void force_qs_rnp(struct rcu_state *rsp, bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0) { - if ((rnp->qsmaskinit & bit) == 0) - *isidle = false; /* Pending hotplug. */ if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } -- cgit v1.2.3-59-g8ed1b From a738eec6c6082f48cbcf0157fd9f550e286ea04b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Mar 2015 14:53:29 -0700 Subject: rcu: Correctly initialize ->rcu_qs_ctr_snap at online time The rcu_data structure's ->rcu_qs_ctr_snap field is initialized at CPU-online time from the current CPU's element of the per-CPU rcu_qs_ctr variable. Unfortunately, this is at CPU_UP_PREPARE time, so has nothing to do with the CPU being onlined. This commit therefore initializes this variable from the incoming CPU's element of rcu_qs_ctr. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a1df68fce545..d198a33d54bd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3763,7 +3763,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->passed_quiesce = false; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); rdp->qs_pending = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore(&rnp->lock, flags); -- cgit v1.2.3-59-g8ed1b From eab128e8305f2bc4c91406031aab26d86fecced6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Apr 2015 12:08:22 -0700 Subject: rcu: Modulate grace-period slow init to normalize delay Currently, the larger the gp_init_delay boot parameter, the slower rcutorture will sequence through grace periods. This commit avoids this issue by decreasing the probability of slowing initialization of a given grace period as the degree of slowness increases. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0628df155970..c34422d92aa9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -169,7 +169,17 @@ module_param(gp_init_delay, int, 0644); #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ static const int gp_init_delay; #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ -#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ + +/* + * Number of grace periods between delays, normalized by the duration of + * the delay. The longer the the delay, the more the grace periods between + * each delay. The reason for this normalization is that it means that, + * for non-zero delays, the overall slowdown of grace periods is constant + * regardless of the duration of the delay. This arrangement balances + * the need for long delays to increase some race probabilities with the + * need for fast grace periods to increase other race probabilities. + */ +#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1848,7 +1858,8 @@ static int rcu_gp_init(struct rcu_state *rsp) cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); if (gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) + !(rsp->gpnum % + (rcu_num_nodes * PER_RCU_NODE_PERIOD * gp_init_delay))) schedule_timeout_uninterruptible(gp_init_delay); } -- cgit v1.2.3-59-g8ed1b From 3eaaaf6cd6d7fbaf552cc543ccb93d7da81f43ec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Mar 2015 16:51:17 -0700 Subject: rcu: Shut up spurious gcc uninitialized-variable warning Because gcc doesn't realize that rcu_num_lvls must be strictly greater than zero, some versions give a spurious warning about levelcnt[0] being uninitialized in rcu_init_one(). This commit updates the condition on the pre-existing panic() in order to educate gcc on this point. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c34422d92aa9..9b076b284695 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3982,9 +3982,9 @@ static void __init rcu_init_one(struct rcu_state *rsp, BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ - /* Silence gcc 4.8 warning about array index out of range. */ - if (rcu_num_lvls > RCU_NUM_LVLS) - panic("rcu_init_one: rcu_num_lvls overflow"); + /* Silence gcc 4.8 false positive about array index out of range. */ + if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS) + panic("rcu_init_one: rcu_num_lvls out of range"); /* Initialize the level-tracking arrays. */ -- cgit v1.2.3-59-g8ed1b From 0f41c0ddadfb3d5baffe62351c380e2881aacd58 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Mar 2015 18:33:20 -0700 Subject: rcu: Provide diagnostic option to slow down grace-period scans Grace-period scans of the rcu_node combining tree normally proceed quite quickly, so that it is very difficult to reproduce races against them. This commit therefore allows grace-period pre-initialization and cleanup to be artificially slowed down, increasing race-reproduction probability. A pair of pairs of new Kconfig parameters are provided, RCU_TORTURE_TEST_SLOW_PREINIT to enable the slowing down of propagating CPU-hotplug changes up the combining tree along with RCU_TORTURE_TEST_SLOW_PREINIT_DELAY to specify the delay in jiffies, and RCU_TORTURE_TEST_SLOW_CLEANUP to enable the slowing down of the end-of-grace-period cleanup scan along with RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY to specify the delay in jiffies. Boot-time parameters named rcutree.gp_preinit_delay and rcutree.gp_cleanup_delay allow these delays to be specified at boot time. Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 16 ++++++- kernel/rcu/tree.c | 29 ++++++++++-- lib/Kconfig.debug | 54 +++++++++++++++++++++- .../selftests/rcutorture/configs/rcu/CFcommon | 2 + 4 files changed, 93 insertions(+), 8 deletions(-) (limited to 'kernel/rcu') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab1628a057..10a4fb80c033 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2992,11 +2992,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.gp_cleanup_delay= [KNL] + Set the number of jiffies to delay each step of + RCU grace-period cleanup. This only has effect + when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set. + rcutree.gp_init_delay= [KNL] Set the number of jiffies to delay each step of RCU grace-period initialization. This only has - effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is - set. + effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT + is set. + + rcutree.gp_preinit_delay= [KNL] + Set the number of jiffies to delay each step of + RCU grace-period pre-initialization, that is, + the propagation of recent CPU-hotplug changes up + the rcu_node combining tree. This only has effect + when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set. rcutree.rcu_fanout_leaf= [KNL] Increase the number of CPUs assigned to each diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9b076b284695..2f3cb5513ca3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -163,6 +163,14 @@ static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ + +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT +static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; +module_param(gp_preinit_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ +static const int gp_preinit_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ + #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); @@ -170,6 +178,13 @@ module_param(gp_init_delay, int, 0644); static const int gp_init_delay; #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP +static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; +module_param(gp_cleanup_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ +static const int gp_cleanup_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ + /* * Number of grace periods between delays, normalized by the duration of * the delay. The longer the the delay, the more the grace periods between @@ -1742,6 +1757,13 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) rcu_gp_kthread_wake(rsp); } +static void rcu_gp_slow(struct rcu_state *rsp, int delay) +{ + if (delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) + schedule_timeout_uninterruptible(delay); +} + /* * Initialize a new grace period. Return 0 if no grace period required. */ @@ -1784,6 +1806,7 @@ static int rcu_gp_init(struct rcu_state *rsp) * will handle subsequent offline CPUs. */ rcu_for_each_leaf_node(rsp, rnp) { + rcu_gp_slow(rsp, gp_preinit_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); if (rnp->qsmaskinit == rnp->qsmaskinitnext && @@ -1840,6 +1863,7 @@ static int rcu_gp_init(struct rcu_state *rsp) * process finishes, because this kthread handles both. */ rcu_for_each_node_breadth_first(rsp, rnp) { + rcu_gp_slow(rsp, gp_init_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); rdp = this_cpu_ptr(rsp->rda); @@ -1857,10 +1881,6 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); - if (gp_init_delay > 0 && - !(rsp->gpnum % - (rcu_num_nodes * PER_RCU_NODE_PERIOD * gp_init_delay))) - schedule_timeout_uninterruptible(gp_init_delay); } return 1; @@ -1955,6 +1975,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); + rcu_gp_slow(rsp, gp_cleanup_delay); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba2b0c87e65b..e1af93ae246b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1261,12 +1261,38 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_TORTURE_TEST_SLOW_PREINIT + bool "Slow down RCU grace-period pre-initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period pre-initialization (the + propagation of CPU-hotplug changes up the rcu_node combining + tree) for a few jiffies between initializing each pair of + consecutive rcu_node structures. This helps to expose races + involving grace-period pre-initialization, in other words, it + makes your kernel less stable. It can also greatly increase + grace-period latency, especially on systems with large numbers + of CPUs. This is useful when torture-testing RCU, but in + almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY + int "How much to slow down RCU grace-period pre-initialization" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_PREINIT + help + This option specifies the number of jiffies to wait between + each rcu_node structure pre-initialization step. + config RCU_TORTURE_TEST_SLOW_INIT bool "Slow down RCU grace-period initialization to expose races" depends on RCU_TORTURE_TEST help - This option makes grace-period initialization block for a - few jiffies between initializing each pair of consecutive + This option delays grace-period initialization for a few + jiffies between initializing each pair of consecutive rcu_node structures. This helps to expose races involving grace-period initialization, in other words, it makes your kernel less stable. It can also greatly increase grace-period @@ -1286,6 +1312,30 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY This option specifies the number of jiffies to wait between each rcu_node structure initialization. +config RCU_TORTURE_TEST_SLOW_CLEANUP + bool "Slow down RCU grace-period cleanup to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period cleanup for a few jiffies + between cleaning up each pair of consecutive rcu_node + structures. This helps to expose races involving grace-period + cleanup, in other words, it makes your kernel less stable. + It can also greatly increase grace-period latency, especially + on systems with large numbers of CPUs. This is useful when + torture-testing RCU, but in almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY + int "How much to slow down RCU grace-period cleanup" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_CLEANUP + help + This option specifies the number of jiffies to wait between + each rcu_node structure cleanup operation. + config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index 49701218dc62..f824b4c9d9d9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,3 +1,5 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y +CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y -- cgit v1.2.3-59-g8ed1b From 7fa270010e0ddd3693381431f373b3e3135b0695 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Apr 2015 10:27:15 -0700 Subject: rcu: Convert CONFIG_RCU_FANOUT_EXACT to boot parameter The CONFIG_RCU_FANOUT_EXACT Kconfig parameter is used primarily (and perhaps only) by rcutorture to verify that RCU works correctly in specific rcu_node combining-tree configurations. It therefore does not make much sense have this as a question to people attempting to configure their kernels. So this commit creates an rcutree.rcu_fanout_exact= boot parameter that rcutorture can use, and eliminates the original CONFIG_RCU_FANOUT_EXACT Kconfig parameter. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar --- Documentation/kernel-parameters.txt | 6 ++++++ init/Kconfig | 14 -------------- kernel/rcu/tree.c | 7 +++++-- kernel/rcu/tree_plugin.h | 2 +- 4 files changed, 12 insertions(+), 17 deletions(-) (limited to 'kernel/rcu') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 10a4fb80c033..f5582dcdf80d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3010,6 +3010,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the rcu_node combining tree. This only has effect when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set. + rcutree.rcu_fanout_exact= [KNL] + Disable autobalancing of the rcu_node combining + tree. This is used by rcutorture, and might + possibly be useful for architectures having high + cache-to-cache transfer latencies. + rcutree.rcu_fanout_leaf= [KNL] Increase the number of CPUs assigned to each leaf rcu_node structure. Useful for very large diff --git a/init/Kconfig b/init/Kconfig index 927210810189..0ec82362cfc0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -611,20 +611,6 @@ config RCU_FANOUT_LEAF Take the default if unsure. -config RCU_FANOUT_EXACT - bool "Disable tree-based hierarchical RCU auto-balancing" - depends on TREE_RCU || PREEMPT_RCU - default n - help - This option forces use of the exact RCU_FANOUT value specified, - regardless of imbalances in the hierarchy. This is useful for - testing RCU itself, and might one day be useful on systems with - strong NUMA behavior. - - Without RCU_FANOUT_EXACT, the code will balance the hierarchy. - - Say N if unsure. - config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" depends on NO_HZ_COMMON && SMP diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2f3cb5513ca3..b49c474e1fff 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -113,6 +113,9 @@ RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); static struct rcu_state *rcu_state_p; LIST_HEAD(rcu_struct_flavors); +/* Control rcu_node-tree auto-balancing at boot time. */ +static bool rcu_fanout_exact; +module_param(rcu_fanout_exact, bool, 0444); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; module_param(rcu_fanout_leaf, int, 0444); @@ -3956,13 +3959,13 @@ void rcu_scheduler_starting(void) /* * Compute the per-level fanout, either using the exact fanout specified - * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. + * or balancing the tree, depending on the rcu_fanout_exact boot parameter. */ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { + if (rcu_fanout_exact) { rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; for (i = rcu_num_lvls - 2; i >= 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 58b1ebdc4387..eb460ec747ef 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -64,7 +64,7 @@ static void __init rcu_bootup_announce_oddness(void) (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", CONFIG_RCU_FANOUT); - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) + if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); -- cgit v1.2.3-59-g8ed1b From a3dc2948cec80f20a89e9b646d0c01b121e48e02 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Apr 2015 11:40:50 -0700 Subject: rcu: Enable diagnostic dump of rcu_node combining tree The purpose of this commit is to make it easier to verify that RCU's combining tree is set up correctly, which is useful to have when making changes in how that tree is initialized. Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar [ paulmck: Fold fix found by Fengguang's 0-day test robot. ] --- Documentation/kernel-parameters.txt | 5 +++++ kernel/rcu/tree.c | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'kernel/rcu') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f5582dcdf80d..a1cb88d9864e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2992,6 +2992,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.dump_tree= [KNL] + Dump the structure of the rcu_node combining tree + out at early boot. This is used for diagnostic + purposes, to verify correct tree setup. + rcutree.gp_cleanup_delay= [KNL] Set the number of jiffies to delay each step of RCU grace-period cleanup. This only has effect diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b49c474e1fff..1bc14c670641 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -113,6 +113,9 @@ RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); static struct rcu_state *rcu_state_p; LIST_HEAD(rcu_struct_flavors); +/* Dump rcu_node combining tree at boot to verify correct setup. */ +static bool dump_tree; +module_param(dump_tree, bool, 0444); /* Control rcu_node-tree auto-balancing at boot time. */ static bool rcu_fanout_exact; module_param(rcu_fanout_exact, bool, 0444); @@ -4144,6 +4147,28 @@ static void __init rcu_init_geometry(void) rcu_num_nodes -= n; } +/* + * Dump out the structure of the rcu_node combining tree associated + * with the rcu_state structure referenced by rsp. + */ +static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) +{ + int level = 0; + struct rcu_node *rnp; + + pr_info("rcu_node tree layout dump\n"); + pr_info(" "); + rcu_for_each_node_breadth_first(rsp, rnp) { + if (rnp->level != level) { + pr_cont("\n"); + pr_info(" "); + level = rnp->level; + } + pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum); + } + pr_cont("\n"); +} + void __init rcu_init(void) { int cpu; @@ -4154,6 +4179,8 @@ void __init rcu_init(void) rcu_init_geometry(); rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data); + if (dump_tree) + rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); -- cgit v1.2.3-59-g8ed1b From 05c5df31afd1092ca6322094d22aff6351fa67fe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Apr 2015 14:27:43 -0700 Subject: rcu: Make RCU able to tolerate undefined CONFIG_RCU_FANOUT This commit introduces an RCU_FANOUT C-preprocessor macro so that RCU will build even when CONFIG_RCU_FANOUT is undefined. The RCU_FANOUT macro is set to the value of CONFIG_RCU_FANOUT when defined, otherwise it is set to 32 for 32-bit systems and 64 for 64-bit systems. This commit then makes CONFIG_RCU_FANOUT depend on CONFIG_RCU_EXPERT, so that Kconfig users won't be asked about CONFIG_RCU_FANOUT unless they want to be. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar --- init/Kconfig | 2 +- kernel/rcu/tree.c | 4 ++-- kernel/rcu/tree.h | 18 +++++++++++++++--- kernel/rcu/tree_plugin.h | 6 +++--- 4 files changed, 21 insertions(+), 9 deletions(-) (limited to 'kernel/rcu') diff --git a/init/Kconfig b/init/Kconfig index ac5386937d37..fd2d4fb517ca 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -583,7 +583,7 @@ config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on TREE_RCU || PREEMPT_RCU + depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT default 64 if 64BIT default 32 if !64BIT help diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1bc14c670641..ba3f8d59d948 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3971,7 +3971,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) if (rcu_fanout_exact) { rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; for (i = rcu_num_lvls - 2; i >= 0; i--) - rsp->levelspread[i] = CONFIG_RCU_FANOUT; + rsp->levelspread[i] = RCU_FANOUT; } else { int ccur; int cprv; @@ -4111,7 +4111,7 @@ static void __init rcu_init_geometry(void) rcu_capacity[0] = 1; rcu_capacity[1] = rcu_fanout_leaf; for (i = 2; i <= MAX_RCU_LVLS; i++) - rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT; /* * The boot-time rcu_fanout_leaf parameter is only permitted diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a69d3dab2ec4..ac3020fff028 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -35,11 +35,23 @@ * In practice, this did work well going from three levels to four. * Of course, your mileage may vary. */ + #define MAX_RCU_LVLS 4 + +#ifdef CONFIG_RCU_FANOUT +#define RCU_FANOUT CONFIG_RCU_FANOUT +#else /* #ifdef CONFIG_RCU_FANOUT */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT 64 +# else +# define RCU_FANOUT 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT */ + #define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) -#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) +#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) +#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) +#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 # define RCU_NUM_LVLS 1 diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index eb460ec747ef..d7e505970f24 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -60,10 +60,10 @@ static void __init rcu_bootup_announce_oddness(void) { if (IS_ENABLED(CONFIG_RCU_TRACE)) pr_info("\tRCU debugfs-based tracing is enabled.\n"); - if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || - (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) + if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || + (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - CONFIG_RCU_FANOUT); + RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) -- cgit v1.2.3-59-g8ed1b From 47d631af58bb9b2f2dd3d0da8c98a79a5e75c738 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Apr 2015 09:12:13 -0700 Subject: rcu: Make RCU able to tolerate undefined CONFIG_RCU_FANOUT_LEAF This commit introduces an RCU_FANOUT_LEAF C-preprocessor macro so that RCU will build even when CONFIG_RCU_FANOUT_LEAF is undefined. The RCU_FANOUT_LEAF macro is set to the value of CONFIG_RCU_FANOUT_LEAF when defined, otherwise it is set to 32 for 32-bit systems and 64 for 64-bit systems. This commit then makes CONFIG_RCU_FANOUT_LEAF depend on CONFIG_RCU_EXPERT, so that Kconfig users won't be asked about CONFIG_RCU_FANOUT_LEAF unless they want to be. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar --- init/Kconfig | 2 +- kernel/rcu/tree.c | 8 ++++---- kernel/rcu/tree.h | 12 +++++++++++- kernel/rcu/tree_plugin.h | 6 +++--- 4 files changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/rcu') diff --git a/init/Kconfig b/init/Kconfig index fd2d4fb517ca..78176001f73b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -603,7 +603,7 @@ config RCU_FANOUT_LEAF int "Tree-based hierarchical RCU leaf-level fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on TREE_RCU || PREEMPT_RCU + depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT default 16 help This option controls the leaf-level fanout of hierarchical diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ba3f8d59d948..1edd11298224 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -119,8 +119,8 @@ module_param(dump_tree, bool, 0444); /* Control rcu_node-tree auto-balancing at boot time. */ static bool rcu_fanout_exact; module_param(rcu_fanout_exact, bool, 0444); -/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ -static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = RCU_FANOUT_LEAF; module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ @@ -4097,7 +4097,7 @@ static void __init rcu_init_geometry(void) jiffies_till_next_fqs = d; /* If the compile-time values are accurate, just leave. */ - if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && + if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", @@ -4121,7 +4121,7 @@ static void __init rcu_init_geometry(void) * the configured number of CPUs. Complain and fall back to the * compile-time values if these limits are exceeded. */ - if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + if (rcu_fanout_leaf < RCU_FANOUT_LEAF || rcu_fanout_leaf > sizeof(unsigned long) * 8 || n > rcu_capacity[MAX_RCU_LVLS]) { WARN_ON(1); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ac3020fff028..7d949c186302 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -48,7 +48,17 @@ # endif #endif /* #else #ifdef CONFIG_RCU_FANOUT */ -#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) +#ifdef CONFIG_RCU_FANOUT_LEAF +#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF +#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT_LEAF 64 +# else +# define RCU_FANOUT_LEAF 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ + +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) #define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) #define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) #define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d7e505970f24..713503853841 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -76,10 +76,10 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tAdditional per-CPU info printed with stalls.\n"); if (NUM_RCU_LVL_4 != 0) pr_info("\tFour-level hierarchy is enabled.\n"); - if (CONFIG_RCU_FANOUT_LEAF != 16) + if (RCU_FANOUT_LEAF != 16) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", - CONFIG_RCU_FANOUT_LEAF); - if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + RCU_FANOUT_LEAF); + if (rcu_fanout_leaf != RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); -- cgit v1.2.3-59-g8ed1b From 26730f55c2842b4ee06a5307d58265db7dd26065 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Apr 2015 09:22:14 -0700 Subject: rcu: Make RCU able to tolerate undefined CONFIG_RCU_KTHREAD_PRIO This commit updates the initialization of the kthread_prio boot parameter so that RCU will build even when CONFIG_RCU_KTHREAD_PRIO is undefined. The kthread_prio boot parameter is set to CONFIG_RCU_KTHREAD_PRIO if that is defined, otherwise to 1 if CONFIG_RCU_BOOST is defined and to zero otherwise. This commit then makes CONFIG_RCU_KTHREAD_PRIO depend on CONFIG_RCU_EXPERT, so that Kconfig users won't be asked about CONFIG_RCU_KTHREAD_PRIO unless they want to be. Reported-by: Linus Torvalds Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar --- init/Kconfig | 1 + kernel/rcu/tree.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'kernel/rcu') diff --git a/init/Kconfig b/init/Kconfig index 78176001f73b..af2c93c4a105 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -671,6 +671,7 @@ config RCU_KTHREAD_PRIO range 0 99 if !RCU_BOOST default 1 if RCU_BOOST default 0 if !RCU_BOOST + depends on RCU_EXPERT help This option specifies the SCHED_FIFO priority value that will be assigned to the rcuc/n and rcub/n threads and is also the value diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1edd11298224..0e9ce1272971 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -165,7 +165,11 @@ static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); /* rcuc/rcub kthread realtime priority */ +#ifdef CONFIG_RCU_KTHREAD_PRIO static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */ +static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; +#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ -- cgit v1.2.3-59-g8ed1b From 1ce46ee597bc36fde6984e91aecc2d662a754199 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 May 2015 23:04:22 -0700 Subject: rcu: Conditionally compile RCU's eqs warnings This commit applies some warning-omission micro-optimizations to RCU's various extended-quiescent-state functions, which are on the kernel/user hotpath for CONFIG_NO_HZ_FULL=y. Reported-by: Rik van Riel Reported by: Mike Galbraith Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 23 +++++++++++++++-------- lib/Kconfig.debug | 11 +++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0e9ce1272971..991fa5c5dc5e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -620,7 +620,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -639,7 +640,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ - WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + atomic_read(&rdtp->dynticks) & 0x1); rcu_dynticks_task_enter(); /* @@ -665,7 +667,8 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + (oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { rdtp->dynticks_nesting = 0; rcu_eqs_enter_common(oldval, user); @@ -738,7 +741,8 @@ void rcu_irq_exit(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; - WARN_ON_ONCE(rdtp->dynticks_nesting < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting < 0); if (rdtp->dynticks_nesting) trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); else @@ -763,10 +767,12 @@ static void rcu_eqs_exit_common(long long oldval, int user) atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic(); /* See above. */ - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -790,7 +796,7 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) { rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { @@ -863,7 +869,8 @@ void rcu_irq_enter(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; - WARN_ON_ONCE(rdtp->dynticks_nesting == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); if (oldval) trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c4e1cf04cf57..b908048f8d6a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1373,6 +1373,17 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +config RCU_EQS_DEBUG + bool "Use this when adding any sort of NO_HZ support to your arch" + depends on DEBUG_KERNEL + help + This option provides consistency checks in RCU's handling of + NO_HZ. These checks have proven quite helpful in detecting + bugs in arch-specific NO_HZ code. + + Say N here if you need ultimate kernel/user switch latencies + Say Y if you are unsure + endmenu # "RCU Debugging" config DEBUG_BLOCK_EXT_DEVT -- cgit v1.2.3-59-g8ed1b From 51952bc633064311410b041fad38da1614f4539e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Apr 2015 11:15:30 -0700 Subject: rcu: Further shrink Tiny RCU by making empty functions static inlines The Tiny RCU counterparts to rcu_idle_enter(), rcu_idle_exit(), rcu_irq_enter(), and rcu_irq_exit() are empty functions, but each has EXPORT_SYMBOL_GPL(), which needlessly consumes extra memory, especially in kernels built with module support. This commit therefore moves these functions to static inlines in rcutiny.h, removing the need for exports. This won't affect the size of the tiniest kernels, which are likely built without module support, but might help semi-tiny kernels that might include module support. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 4 ---- include/linux/rcutiny.h | 16 ++++++++++++++++ include/linux/rcutree.h | 5 +++++ kernel/rcu/tiny.c | 33 --------------------------------- 4 files changed, 21 insertions(+), 37 deletions(-) (limited to 'kernel/rcu') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bb0eee665b..1b3d7bcb3a6c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -292,10 +292,6 @@ void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); struct notifier_block; -void rcu_idle_enter(void); -void rcu_idle_exit(void); -void rcu_irq_enter(void); -void rcu_irq_exit(void); int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 937edaeb150d..3df6c1ec4e25 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void) { } +static inline void rcu_idle_enter(void) +{ +} + +static inline void rcu_idle_exit(void) +{ +} + +static inline void rcu_irq_enter(void) +{ +} + +static inline void rcu_irq_exit(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..f22d83f49e56 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -93,6 +93,11 @@ void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); +void rcu_idle_enter(void); +void rcu_idle_exit(void); +void rcu_irq_enter(void); +void rcu_irq_exit(void); + void exit_rcu(void); void rcu_scheduler_starting(void); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c68..a501b4ab9b1c 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head, #include "tiny_plugin.h" -/* - * Enter idle, which is an extended quiescent state if we have fully - * entered that mode. - */ -void rcu_idle_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_enter); - -/* - * Exit an interrupt handler towards idle. - */ -void rcu_irq_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_exit); - -/* - * Exit idle, so that we are no longer in an extended quiescent state. - */ -void rcu_idle_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_exit); - -/* - * Enter an interrupt handler, moving away from idle. - */ -void rcu_irq_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_enter); - #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* -- cgit v1.2.3-59-g8ed1b From 6e91f8cb138625be96070b778d9ba71ce520ea7e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 May 2015 11:13:05 -0700 Subject: rcu: Correctly handle non-empty Tiny RCU callback list with none ready If, at the time __rcu_process_callbacks() is invoked, there are callbacks in Tiny RCU's callback list, but none of them are ready to be invoked, the current list-management code will knit the non-ready callbacks out of the list. This can result in hangs and possibly worse. This commit therefore inserts a check for there being no callbacks that can be invoked immediately. This bug is unlikely to occur -- you have to get a new callback between the time rcu_sched_qs() or rcu_bh_qs() was called, but before we get to __rcu_process_callbacks(). It was detected by the addition of RCU-bh testing to rcutorture, which in turn was instigated by Iftekhar Ahmed's mutation testing. Although this bug was made much more likely by 915e8a4fe45e (rcu: Remove fastpath from __rcu_process_callbacks()), this did not cause the bug, but rather made it much more probable. That said, it takes more than 40 hours of rcutorture testing, on average, for this bug to appear, so this fix cannot be considered an emergency. Signed-off-by: Paul E. McKenney Cc: Reviewed-by: Josh Triplett --- kernel/rcu/tiny.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index a501b4ab9b1c..591af0cb7b9f 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -137,6 +137,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; -- cgit v1.2.3-59-g8ed1b From 3838cc1850ccd09f93e729e9047ec1995026f83e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Mar 2015 13:55:48 -0700 Subject: rcutorture: Allow negative values of nreaders to oversubscribe By default, with rcutorture.nreaders equal to -1, rcutorture provisions N-1 reader kthreads, where N is the number of CPUs. This avoids rcutorture-induced stalls, but also avoids heavier levels of torture. This commit therefore allows negative values of rcutorture.nreaders to specify larger numbers of reader kthreads, so that for example rcutorture.nreaders=-2 provisions N kthreads and rcutorture.nreaders=-5 provisions N+3 kthreads. Signed-off-by: Paul E. McKenney [ paulmck: Update documentation, as suggested by Josh Triplett. ] --- Documentation/kernel-parameters.txt | 6 +++++- kernel/rcu/rcutorture.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab1628a057..04b811086dca 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3101,7 +3101,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. test, hence the "fake". rcutorture.nreaders= [KNL] - Set number of RCU readers. + Set number of RCU readers. The value -1 selects + N-1, where N is the number of CPUs. A value + "n" less than -1 selects N-n-2, where N is again + the number of CPUs. For example, -2 selects N + (the number of CPUs), -3 selects N+1, and so on. rcutorture.object_debug= [KNL] Enable debug-object double-call_rcu() testing. diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index a67ef6ff86b0..7294d605c481 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1701,7 +1701,7 @@ rcu_torture_init(void) if (nreaders >= 0) { nrealreaders = nreaders; } else { - nrealreaders = num_online_cpus() - 1; + nrealreaders = num_online_cpus() - 2 - nreaders; if (nrealreaders <= 0) nrealreaders = 1; } -- cgit v1.2.3-59-g8ed1b From 6c7ed42c81a2d9a7e0646240599552040375fa02 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2015 11:58:08 -0700 Subject: rcutorture: Replace barriers with smp_store_release() and smp_load_acquire() The rcutorture.c file uses several explicit memory barriers that can easily be converted to smp_store_release() and smp_load_acquire(), which improves maintainability and also improves performance a bit. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/rcutorture.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7294d605c481..90ff8dfc51e5 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -672,8 +672,8 @@ static void rcu_torture_boost_cb(struct rcu_head *head) struct rcu_boost_inflight *rbip = container_of(head, struct rcu_boost_inflight, rcu); - smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ - rbip->inflight = 0; + /* Ensure RCU-core accesses precede clearing ->inflight */ + smp_store_release(&rbip->inflight, 0); } static int rcu_torture_boost(void *arg) @@ -710,9 +710,9 @@ static int rcu_torture_boost(void *arg) call_rcu_time = jiffies; while (ULONG_CMP_LT(jiffies, endtime)) { /* If we don't have a callback in flight, post one. */ - if (!rbi.inflight) { - smp_mb(); /* RCU core before ->inflight = 1. */ - rbi.inflight = 1; + if (!smp_load_acquire(&rbi.inflight)) { + /* RCU core before ->inflight = 1. */ + smp_store_release(&rbi.inflight, 1); call_rcu(&rbi.rcu, rcu_torture_boost_cb); if (jiffies - call_rcu_time > test_boost_duration * HZ - HZ / 2) { @@ -751,11 +751,10 @@ checkwait: stutter_wait("rcu_torture_boost"); } while (!torture_must_stop()); /* Clean up and exit. */ - while (!kthread_should_stop() || rbi.inflight) { + while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) { torture_shutdown_absorb("rcu_torture_boost"); schedule_timeout_uninterruptible(1); } - smp_mb(); /* order accesses to ->inflight before stack-frame death. */ destroy_rcu_head_on_stack(&rbi.rcu); torture_kthread_stopping("rcu_torture_boost"); return 0; @@ -1413,12 +1412,15 @@ static int rcu_torture_barrier_cbs(void *arg) do { wait_event(barrier_cbs_wq[myid], (newphase = - READ_ONCE(barrier_phase)) != lastphase || + smp_load_acquire(&barrier_phase)) != lastphase || torture_must_stop()); lastphase = newphase; - smp_mb(); /* ensure barrier_phase load before ->call(). */ if (torture_must_stop()) break; + /* + * The above smp_load_acquire() ensures barrier_phase load + * is ordered before the folloiwng ->call(). + */ cur_ops->call(&rcu, rcu_torture_barrier_cbf); if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); @@ -1439,8 +1441,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - smp_mb(); /* Ensure barrier_phase after prior assignments. */ - barrier_phase = !barrier_phase; + /* Ensure barrier_phase ordered after prior assignments. */ + smp_store_release(&barrier_phase, !barrier_phase); for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, -- cgit v1.2.3-59-g8ed1b From ca1d51ed9809a99d71c23a343b3acd3fd4ad8cbe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Apr 2015 12:28:22 -0700 Subject: rcutorture: Test SRCU cleanup code path The current rcutorture testing does not do any cleanup operations. This works because the srcu_struct is statically allocated, but it does represent a memory leak of the associated dynamically allocated ->per_cpu_ref per-CPU variables. However, rcutorture currently uses a statically allocated srcu_struct, which cannot legally be passed to cleanup_srcu_struct(). Therefore, this commit adds a second form of srcu (called srcud) that dynamically allocates and frees the associated per-CPU variables. This commit also adds a ->cleanup() member to rcu_torture_ops that is invoked at the end of the test, after ->cb_barriers(). This ->cleanup() pointer is NULL for all existing tests, and thus only used for scrud. Finally, the SRCU-P torture-test configuration selects scrud instead of srcu, with SRCU-N continuing to use srcu, thereby testing both static and dynamic srcu_struct structures. Reported-by: "Ahmed, Iftekhar" Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/rcutorture.c | 77 ++++++++++++++++------ .../selftests/rcutorture/configs/rcu/SRCU-P.boot | 2 +- 2 files changed, 59 insertions(+), 20 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 90ff8dfc51e5..59e32684c23b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -241,6 +241,7 @@ rcu_torture_free(struct rcu_torture *p) struct rcu_torture_ops { int ttype; void (*init)(void); + void (*cleanup)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); void (*readunlock)(int idx); @@ -477,10 +478,12 @@ static struct rcu_torture_ops rcu_busted_ops = { */ DEFINE_STATIC_SRCU(srcu_ctl); +static struct srcu_struct srcu_ctld; +static struct srcu_struct *srcu_ctlp = &srcu_ctl; -static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) +static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(&srcu_ctl); + return srcu_read_lock(srcu_ctlp); } static void srcu_read_delay(struct torture_random_state *rrsp) @@ -499,49 +502,49 @@ static void srcu_read_delay(struct torture_random_state *rrsp) rcu_read_delay(rrsp); } -static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) +static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(&srcu_ctl, idx); + srcu_read_unlock(srcu_ctlp, idx); } static unsigned long srcu_torture_completed(void) { - return srcu_batches_completed(&srcu_ctl); + return srcu_batches_completed(srcu_ctlp); } static void srcu_torture_deferred_free(struct rcu_torture *rp) { - call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); + call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); } static void srcu_torture_synchronize(void) { - synchronize_srcu(&srcu_ctl); + synchronize_srcu(srcu_ctlp); } static void srcu_torture_call(struct rcu_head *head, void (*func)(struct rcu_head *head)) { - call_srcu(&srcu_ctl, head, func); + call_srcu(srcu_ctlp, head, func); } static void srcu_torture_barrier(void) { - srcu_barrier(&srcu_ctl); + srcu_barrier(srcu_ctlp); } static void srcu_torture_stats(void) { int cpu; - int idx = srcu_ctl.completed & 0x1; + int idx = srcu_ctlp->completed & 0x1; pr_alert("%s%s per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; - c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; - c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; + c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx]; + c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx]; pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } pr_cont("\n"); @@ -549,7 +552,7 @@ static void srcu_torture_stats(void) static void srcu_torture_synchronize_expedited(void) { - synchronize_srcu_expedited(&srcu_ctl); + synchronize_srcu_expedited(srcu_ctlp); } static struct rcu_torture_ops srcu_ops = { @@ -569,6 +572,38 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_init(void) +{ + rcu_sync_torture_init(); + WARN_ON(init_srcu_struct(&srcu_ctld)); + srcu_ctlp = &srcu_ctld; +} + +static void srcu_torture_cleanup(void) +{ + cleanup_srcu_struct(&srcu_ctld); + srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ +} + +/* As above, but dynamically allocated. */ +static struct rcu_torture_ops srcud_ops = { + .ttype = SRCU_FLAVOR, + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .started = NULL, + .completed = srcu_torture_completed, + .deferred_free = srcu_torture_deferred_free, + .sync = srcu_torture_synchronize, + .exp_sync = srcu_torture_synchronize_expedited, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, + .stats = srcu_torture_stats, + .name = "srcud" +}; + /* * Definitions for sched torture testing. */ @@ -1053,7 +1088,7 @@ static void rcu_torture_timer(unsigned long unused) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -1127,7 +1162,7 @@ rcu_torture_reader(void *arg) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); @@ -1590,10 +1625,14 @@ rcu_torture_cleanup(void) rcutorture_booster_cleanup(i); } - /* Wait for all RCU callbacks to fire. */ - + /* + * Wait for all RCU callbacks to fire, then do flavor-specific + * cleanup operations. + */ if (cur_ops->cb_barrier != NULL) cur_ops->cb_barrier(); + if (cur_ops->cleanup != NULL) + cur_ops->cleanup(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ @@ -1670,8 +1709,8 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { - &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, - RCUTORTURE_TASKS_OPS + &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, + &sched_ops, RCUTORTURE_TASKS_OPS }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 238bfe3bd0cc..84a7d51b7481 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1 +1 @@ -rcutorture.torture_type=srcu +rcutorture.torture_type=srcud -- cgit v1.2.3-59-g8ed1b