aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/hrtimer.c
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2019-07-26 20:30:58 +0200
committerThomas Gleixner <tglx@linutronix.de>2019-08-01 20:51:22 +0200
commit1842f5a427f5323f5c19ab99b55d09b3ab5172a5 (patch)
tree273a4d23da40d208ed187f09ae9c9a0007b2b35e /kernel/time/hrtimer.c
parenthrtimer: Move unmarked hrtimers to soft interrupt expiry on RT (diff)
downloadlinux-dev-1842f5a427f5323f5c19ab99b55d09b3ab5172a5.tar.xz
linux-dev-1842f5a427f5323f5c19ab99b55d09b3ab5172a5.zip
hrtimer: Determine hard/soft expiry mode for hrtimer sleepers on RT
On PREEMPT_RT enabled kernels hrtimers which are not explicitely marked for hard interrupt expiry mode are moved into soft interrupt context either for latency reasons or because the hrtimer callback takes regular spinlocks or invokes other functions which are not suitable for hard interrupt context on PREEMPT_RT. The hrtimer_sleeper callback is RT compatible in hard interrupt context, but there is a latency concern: Untrusted userspace can spawn many threads which arm timers for the same expiry time on the same CPU. On expiry that causes a latency spike due to the wakeup of a gazillion threads. OTOH, priviledged real-time user space applications rely on the low latency of hard interrupt wakeups. These syscall related wakeups are all based on hrtimer sleepers. If the current task is in a real-time scheduling class, mark the mode for hard interrupt expiry. [ tglx: Split out of a larger combo patch. Added changelog ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20190726185753.645792403@linutronix.de
Diffstat (limited to '')
-rw-r--r--kernel/time/hrtimer.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 90dcc4d95e91..c101f88ae8aa 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1676,6 +1676,16 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
enum hrtimer_mode mode)
{
+ /*
+ * Make the enqueue delivery mode check work on RT. If the sleeper
+ * was initialized for hard interrupt delivery, force the mode bit.
+ * This is a special case for hrtimer_sleepers because
+ * hrtimer_init_sleeper() determines the delivery mode on RT so the
+ * fiddling with this decision is avoided at the call sites.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
+ mode |= HRTIMER_MODE_HARD;
+
hrtimer_start_expires(&sl->timer, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
@@ -1683,6 +1693,30 @@ EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
clockid_t clock_id, enum hrtimer_mode mode)
{
+ /*
+ * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+ * marked for hard interrupt expiry mode are moved into soft
+ * interrupt context either for latency reasons or because the
+ * hrtimer callback takes regular spinlocks or invokes other
+ * functions which are not suitable for hard interrupt context on
+ * PREEMPT_RT.
+ *
+ * The hrtimer_sleeper callback is RT compatible in hard interrupt
+ * context, but there is a latency concern: Untrusted userspace can
+ * spawn many threads which arm timers for the same expiry time on
+ * the same CPU. That causes a latency spike due to the wakeup of
+ * a gazillion threads.
+ *
+ * OTOH, priviledged real-time user space applications rely on the
+ * low latency of hard interrupt wakeups. If the current task is in
+ * a real-time scheduling class, mark the mode for hard interrupt
+ * expiry.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+ mode |= HRTIMER_MODE_HARD;
+ }
+
__hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
sl->task = current;