aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@kernel.org>2022-03-08 15:45:33 -0800
committerPaul E. McKenney <paulmck@kernel.org>2022-05-03 10:20:57 -0700
commit282d8998e9979c2186af7f7d22366f2fc3149838 (patch)
treea89b3241fcd01074c023062015e24de1440f5066
parentsrcu: Add contention check to call_srcu() srcu_data ->lock acquisition (diff)
downloadlinux-dev-282d8998e9979c2186af7f7d22366f2fc3149838.tar.xz
linux-dev-282d8998e9979c2186af7f7d22366f2fc3149838.zip
srcu: Prevent expedited GPs and blocking readers from consuming CPU
If an SRCU reader blocks while a synchronize_srcu_expedited() waits for that same reader, then that grace period will spawn an endless series of workqueue handlers, consuming a full CPU. This quickly gets pointless because consuming more CPU isn't going to make that reader get done faster, especially if it is blocked waiting for an external event. This commit therefore spawns at most one pair of back-to-back workqueue handlers per expedited grace period phase, instead inserting increasing delays as that grace period phase grows older, but capped at 10 jiffies. In any case, if there have been at least 100 back-to-back workqueue handlers within a single jiffy, regardless of grace period or grace-period phase, then a one-jiffy delay is inserted. [ paulmck: Apply feedback from kernel test robot. ] Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com> Reported-by: Song Liu <song@kernel.org> Tested-by: kernel test robot <oliver.sang@intel.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
-rw-r--r--include/linux/srcutree.h4
-rw-r--r--kernel/rcu/srcutree.c44
2 files changed, 42 insertions, 6 deletions
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 1b9ff4ed37e4..e3014319d1ad 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -71,9 +71,11 @@ struct srcu_struct {
unsigned long srcu_gp_seq; /* Grace-period seq #. */
unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
+ unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */
unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */
unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */
unsigned long srcu_n_lock_retries; /* Contention events in current interval. */
+ unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */
struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
bool sda_is_static; /* May ->sda be passed to free_percpu()? */
unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */
@@ -83,6 +85,8 @@ struct srcu_struct {
atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */
/* callback for the barrier */
/* operation. */
+ unsigned long reschedule_jiffies;
+ unsigned long reschedule_count;
struct delayed_work work;
struct lockdep_map dep_map;
};
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 862008c147b0..6dd44e759f12 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -511,7 +511,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
return sum;
}
-#define SRCU_INTERVAL 1
+#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
+#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
/*
* Return grace-period delay, zero if there are expedited grace
@@ -519,9 +522,18 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
+ unsigned long jbase = SRCU_INTERVAL;
+
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
- return 0;
- return SRCU_INTERVAL;
+ jbase = 0;
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
+ jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
+ if (!jbase) {
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+ jbase = 1;
+ }
+ return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
/**
@@ -623,6 +635,8 @@ static void srcu_gp_start(struct srcu_struct *ssp)
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
rcu_seq_snap(&ssp->srcu_gp_seq));
spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
+ WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
rcu_seq_start(&ssp->srcu_gp_seq);
state = rcu_seq_state(ssp->srcu_gp_seq);
@@ -706,7 +720,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- cbdelay = srcu_get_delay(ssp);
+ cbdelay = !!srcu_get_delay(ssp);
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -893,7 +907,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
// the one boot CPU running with interrupts still disabled.
if (likely(srcu_init_done))
queue_delayed_work(rcu_gp_wq, &ssp->work,
- srcu_get_delay(ssp));
+ !!srcu_get_delay(ssp));
else if (list_empty(&ssp->work.work.entry))
list_add(&ssp->work.work.entry, &srcu_boot_list);
}
@@ -1448,6 +1462,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
srcu_flip(ssp);
spin_lock_irq_rcu_node(ssp);
rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+ ssp->srcu_n_exp_nodelay = 0;
spin_unlock_irq_rcu_node(ssp);
}
@@ -1462,6 +1477,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
mutex_unlock(&ssp->srcu_gp_mutex);
return; /* readers present, retry later. */
}
+ ssp->srcu_n_exp_nodelay = 0;
srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
}
}
@@ -1552,12 +1568,28 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
*/
static void process_srcu(struct work_struct *work)
{
+ unsigned long curdelay;
+ unsigned long j;
struct srcu_struct *ssp;
ssp = container_of(work, struct srcu_struct, work.work);
srcu_advance_state(ssp);
- srcu_reschedule(ssp, srcu_get_delay(ssp));
+ curdelay = srcu_get_delay(ssp);
+ if (curdelay) {
+ WRITE_ONCE(ssp->reschedule_count, 0);
+ } else {
+ j = jiffies;
+ if (READ_ONCE(ssp->reschedule_jiffies) == j) {
+ WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
+ if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+ curdelay = 1;
+ } else {
+ WRITE_ONCE(ssp->reschedule_count, 1);
+ WRITE_ONCE(ssp->reschedule_jiffies, j);
+ }
+ }
+ srcu_reschedule(ssp, curdelay);
}
void srcutorture_get_gp_data(enum rcutorture_type test_type,