aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@kernel.org>2020-09-03 09:47:42 -0700
committerPaul E. McKenney <paulmck@kernel.org>2020-09-03 09:47:42 -0700
commit7fbe67e46aab13f99d551ab04a1168a7d58cdae9 (patch)
tree8feb62912b645adcc5bccaadb0b81d0674430c3d /kernel/rcu
parentMerge branch 'scftorture.2020.08.24a' into HEAD (diff)
parentrcu: Remove unused "cpu" parameter from rcu_report_qs_rdp() (diff)
downloadlinux-dev-7fbe67e46aab13f99d551ab04a1168a7d58cdae9.tar.xz
linux-dev-7fbe67e46aab13f99d551ab04a1168a7d58cdae9.zip
Merge branch 'strictgp.2020.08.24a' into HEAD
strictgp.2020.08.24a: Strict grace periods for KASAN testing.
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/Kconfig8
-rw-r--r--kernel/rcu/Kconfig.debug15
-rw-r--r--kernel/rcu/tree.c73
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_plugin.h32
5 files changed, 113 insertions, 16 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 0ebe15a84985..b71e21f73c40 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -135,10 +135,12 @@ config RCU_FANOUT
config RCU_FANOUT_LEAF
int "Tree-based hierarchical RCU leaf-level fanout value"
- range 2 64 if 64BIT
- range 2 32 if !64BIT
+ range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD
+ range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD
+ range 2 3 if RCU_STRICT_GRACE_PERIOD
depends on TREE_RCU && RCU_EXPERT
- default 16
+ default 16 if !RCU_STRICT_GRACE_PERIOD
+ default 2 if RCU_STRICT_GRACE_PERIOD
help
This option controls the leaf-level fanout of hierarchical
implementations of RCU, and allows trading off cache misses
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 5cb175df6ece..1942c1f1bb65 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -114,4 +114,19 @@ config RCU_EQS_DEBUG
Say N here if you need ultimate kernel/user switch latencies
Say Y if you are unsure
+config RCU_STRICT_GRACE_PERIOD
+ bool "Provide debug RCU implementation with short grace periods"
+ depends on DEBUG_KERNEL && RCU_EXPERT
+ default n
+ select PREEMPT_COUNT if PREEMPT=n
+ help
+ Select this option to build an RCU variant that is strict about
+ grace periods, making them as short as it can. This limits
+ scalability, destroys real-time response, degrades battery
+ lifetime and kills performance. Don't try this on large
+ machines, as in systems with more than about 10 or 20 CPUs.
+ But in conjunction with tools like KASAN, it can be helpful
+ when looking for certain types of RCU usage bugs, for example,
+ too-short RCU read-side critical sections.
+
endmenu # "RCU Debugging"
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 92450642c4d8..548404489c04 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
+// Add delay to rcu_read_unlock() for strict grace periods.
+static int rcu_unlock_delay;
+#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
+module_param(rcu_unlock_delay, int, 0444);
+#endif
+
/*
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
@@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void)
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
-#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
+#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
+ // Maximum callbacks per rcu_do_batch ...
+#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT;
-#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
+#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK;
-#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
+#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
-static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */
-static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */
+static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
+static long qovld_calc = -1; // No pre-initialization lock acquisitions!
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444);
-static ulong jiffies_till_first_fqs = ULONG_MAX;
+static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
static int rcu_divisor = 7;
@@ -1572,6 +1579,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
}
/*
+ * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
+ * quiescent state. This is intended to be invoked when the CPU notices
+ * a new grace period.
+ */
+static void rcu_strict_gp_check_qs(void)
+{
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rcu_read_lock();
+ rcu_read_unlock();
+ }
+}
+
+/*
* Update CPU-local rcu_data state to record the beginnings and ends of
* grace periods. The caller must hold the ->lock of the leaf rcu_node
* structure corresponding to the current CPU, and must have irqs disabled.
@@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp)
}
needwake = __note_gp_changes(rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ rcu_strict_gp_check_qs();
if (needwake)
rcu_gp_kthread_wake();
}
@@ -1679,6 +1700,15 @@ static void rcu_gp_torture_wait(void)
}
/*
+ * Handler for on_each_cpu() to invoke the target CPU's RCU core
+ * processing.
+ */
+static void rcu_strict_gp_boundary(void *unused)
+{
+ invoke_rcu_core();
+}
+
+/*
* Initialize a new grace period. Return false if no grace period required.
*/
static bool rcu_gp_init(void)
@@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_activity, jiffies);
}
+ // If strict, make all CPUs aware of new grace period.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
+
return true;
}
@@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void)
rcu_state.gp_flags & RCU_GP_FLAG_INIT);
}
raw_spin_unlock_irq_rcu_node(rnp);
+
+ // If strict, make all CPUs aware of the end of the old grace period.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
}
/*
@@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
* structure. This must be called from the specified CPU.
*/
static void
-rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
+rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
@@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_node *rnp;
+ WARN_ON_ONCE(rdp->cpu != smp_processor_id());
rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
@@ -2228,8 +2267,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
- if (rdp->cpu == smp_processor_id())
- rdp->core_needs_qs = false;
+ rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
@@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
* Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that).
*/
- rcu_report_qs_rdp(rdp->cpu, rdp);
+ rcu_report_qs_rdp(rdp);
}
/*
@@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void)
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+// Workqueue handler for an RCU reader for kernels enforcing struct RCU
+// grace periods.
+static void strict_work_handler(struct work_struct *work)
+{
+ rcu_read_lock();
+ rcu_read_unlock();
+}
+
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(void)
{
@@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void)
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
+
+ // If strict GPs, schedule an RCU reader in a clean environment.
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
}
static void rcu_core_si(struct softirq_action *h)
@@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu)
/* Set up local state, ensuring consistent view of global state. */
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
+ INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 309bc7f41d35..e4f66b8f7c47 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -165,6 +165,7 @@ struct rcu_data {
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
bool defer_qs_iw_pending; /* Scheduler attention pending? */
+ struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cb1e8c8befb9..fd8a52e9a887 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -36,6 +36,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
@@ -374,6 +376,8 @@ void __rcu_read_lock(void)
rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -455,8 +459,14 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return;
}
t->rcu_read_unlock_special.s = 0;
- if (special.b.need_qs)
- rcu_qs();
+ if (special.b.need_qs) {
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+ } else {
+ rcu_qs();
+ }
+ }
/*
* Respond to a request by an expedited grace period for a
@@ -769,6 +779,24 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#else /* #ifdef CONFIG_PREEMPT_RCU */
/*
+ * If strict grace periods are enabled, and if the calling
+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
+ * report that quiescent state and, if requested, spin for a bit.
+ */
+void rcu_read_unlock_strict(void)
+{
+ struct rcu_data *rdp;
+
+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
+ irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
+ return;
+ rdp = this_cpu_ptr(&rcu_data);
+ rcu_report_qs_rdp(rdp);
+ udelay(rcu_unlock_delay);
+}
+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
+
+/*
* Tell them what RCU they are running.
*/
static void __init rcu_bootup_announce(void)