aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/timer.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@s-opensource.com>2017-09-29 05:24:10 -0400
committerMauro Carvalho Chehab <mchehab@s-opensource.com>2017-09-29 05:24:10 -0400
commitcf09e3c904bf424f8b6a8203958e09bf7d9bcbc0 (patch)
tree5e9936b3de36aa222b52a9bca366a43d98730ffd /kernel/time/timer.c
parentmedia: staging: atomisp: use clock framework for camera clocks (diff)
parentLinux 4.14-rc2 (diff)
Merge tag 'v4.14-rc2' into patchwork
Linux 4.14-rc2 * tag 'v4.14-rc2': (12066 commits) Linux 4.14-rc2 tpm: ibmvtpm: simplify crq initialization and document crq format tpm: replace msleep() with usleep_range() in TPM 1.2/2.0 generic drivers Documentation: tpm: add powered-while-suspended binding documentation tpm: tpm_crb: constify acpi_device_id. tpm: vtpm: constify vio_device_id security: fix description of values returned by cap_inode_need_killpriv x86/asm: Fix inline asm call constraints for Clang objtool: Handle another GCC stack pointer adjustment bug inet: fix improper empty comparison net: use inet6_rcv_saddr to compare sockets net: set tb->fast_sk_family net: orphan frags on stand-alone ptype in dev_queue_xmit_nit MAINTAINERS: update git tree locations for ieee802154 subsystem SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags SMB3: handle new statx fields arch: remove unused *_segments() macros/functions parisc: Unbreak bootloader due to gcc-7 optimizations parisc: Reintroduce option to gzip-compress the kernel apparmor: fix apparmorfs DAC access permissions ...
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r--kernel/time/timer.c50
1 files changed, 41 insertions, 9 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
+ bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
- unsigned long jnow = READ_ONCE(jiffies);
+ unsigned long jnow;
/*
- * We only forward the base when it's idle and we have a delta between
- * base clock and jiffies.
+ * We only forward the base when we are idle or have just come out of
+ * idle (must_forward_clk logic), and have a delta between base clock
+ * and jiffies. In the common case, run_timers will take care of it.
*/
- if (!base->is_idle || (long) (jnow - base->clk) < 2)
+ if (likely(!base->must_forward_clk))
+ return;
+
+ jnow = READ_ONCE(jiffies);
+ base->must_forward_clk = base->is_idle;
+ if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
+ /*
+ * The downside of this optimization is that it can result in
+ * larger granularity than you would get from adding a new
+ * timer with this expiry.
+ */
if (timer->expires == expires)
return 1;
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
} else {
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
+ forward_timer_base(base);
}
}
- /* Try to forward a stale timer base clock */
- forward_timer_base(base);
-
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
if (!is_max_delta)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
- * If we expect to sleep more than a tick, mark the base idle:
+ * If we expect to sleep more than a tick, mark the base idle.
+ * Also the tick is stopped so any added timer must forward
+ * the base clk itself to keep granularity small. This idle
+ * logic is only maintained for the BASE_STD base, deferrable
+ * timers may still see large granularity skew (by design).
*/
- if ((expires - basem) > TICK_NSEC)
+ if ((expires - basem) > TICK_NSEC) {
+ base->must_forward_clk = true;
base->is_idle = true;
+ }
}
raw_spin_unlock(&base->lock);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ /*
+ * must_forward_clk must be cleared before running timers so that any
+ * timer functions that call mod_timer will not try to forward the
+ * base. idle trcking / clock forwarding logic is only used with
+ * BASE_STD timers.
+ *
+ * The deferrable base does not do idle tracking at all, so we do
+ * not forward it. This can result in very large variations in
+ * granularity for deferrable timers, but they can be deferred for
+ * long periods due to idle.
+ */
+ base->must_forward_clk = false;
+
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));