aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/clocksource.c53
-rw-r--r--kernel/time/hrtimer.c154
-rw-r--r--kernel/time/itimer.c4
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-timers.c4
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-internal.h3
-rw-r--r--kernel/time/tick-sched.c9
-rw-r--r--kernel/time/timekeeping.c18
-rw-r--r--kernel/time/timer.c13
11 files changed, 186 insertions, 78 deletions
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index b97401f6bc23..0e96c38204a8 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -838,9 +838,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
if (flags == TIMER_ABSTIME)
return -ERESTARTNOHAND;
- restart->fn = alarm_timer_nsleep_restart;
restart->nanosleep.clockid = type;
restart->nanosleep.expires = exp;
+ set_restart_fn(restart, alarm_timer_nsleep_restart);
return ret;
}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 428beb69426a..6863a054c970 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating);
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
+ */
+#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
static void clocksource_watchdog_work(struct work_struct *work)
{
/*
@@ -184,12 +191,45 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_unlock_irqrestore(&watchdog_lock, flags);
}
+static ulong max_cswd_read_retries = 3;
+module_param(max_cswd_read_retries, ulong, 0644);
+
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+ unsigned int nretries;
+ u64 wd_end, wd_delta;
+ int64_t wd_delay;
+
+ for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+ local_irq_disable();
+ *wdnow = watchdog->read(watchdog);
+ *csnow = cs->read(cs);
+ wd_end = watchdog->read(watchdog);
+ local_irq_enable();
+
+ wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+ wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+ watchdog->shift);
+ if (wd_delay <= WATCHDOG_MAX_SKEW) {
+ if (nretries > 1 || nretries >= max_cswd_read_retries) {
+ pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+ smp_processor_id(), watchdog->name, nretries);
+ }
+ return true;
+ }
+ }
+
+ pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
+ smp_processor_id(), watchdog->name, wd_delay, nretries);
+ return false;
+}
+
static void clocksource_watchdog(struct timer_list *unused)
{
- struct clocksource *cs;
u64 csnow, wdnow, cslast, wdlast, delta;
- int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
+ int64_t wd_nsec, cs_nsec;
+ struct clocksource *cs;
spin_lock(&watchdog_lock);
if (!watchdog_running)
@@ -206,10 +246,11 @@ static void clocksource_watchdog(struct timer_list *unused)
continue;
}
- local_irq_disable();
- csnow = cs->read(cs);
- wdnow = watchdog->read(watchdog);
- local_irq_enable();
+ if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+ /* Clock readout unreliable, so give it up. */
+ __clocksource_unstable(cs);
+ continue;
+ }
/* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 7f31932216a1..e1e8d5dab0c5 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -547,8 +547,11 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
}
/*
- * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
- * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
+ * Recomputes cpu_base::*next_timer and returns the earliest expires_next
+ * but does not set cpu_base::*expires_next, that is done by
+ * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
+ * cpu_base::*expires_next right away, reprogramming logic would no longer
+ * work.
*
* When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
* those timers will get run whenever the softirq gets handled, at the end of
@@ -589,6 +592,37 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
return expires_next;
}
+static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+ ktime_t expires_next, soft = KTIME_MAX;
+
+ /*
+ * If the soft interrupt has already been activated, ignore the
+ * soft bases. They will be handled in the already raised soft
+ * interrupt.
+ */
+ if (!cpu_base->softirq_activated) {
+ soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
+ /*
+ * Update the soft expiry time. clock_settime() might have
+ * affected it.
+ */
+ cpu_base->softirq_expires_next = soft;
+ }
+
+ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+ /*
+ * If a softirq timer is expiring first, update cpu_base->next_timer
+ * and program the hardware with the soft expiry time.
+ */
+ if (expires_next > soft) {
+ cpu_base->next_timer = cpu_base->softirq_next_timer;
+ expires_next = soft;
+ }
+
+ return expires_next;
+}
+
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
@@ -629,23 +663,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
ktime_t expires_next;
- /*
- * Find the current next expiration time.
- */
- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
-
- if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
- /*
- * When the softirq is activated, hrtimer has to be
- * programmed with the first hard hrtimer because soft
- * timer interrupt could occur too late.
- */
- if (cpu_base->softirq_activated)
- expires_next = __hrtimer_get_next_event(cpu_base,
- HRTIMER_ACTIVE_HARD);
- else
- cpu_base->softirq_expires_next = expires_next;
- }
+ expires_next = hrtimer_update_next_event(cpu_base);
if (skip_equal && expires_next == cpu_base->expires_next)
return;
@@ -741,22 +759,6 @@ static void hrtimer_switch_to_hres(void)
retrigger_next_event(NULL);
}
-static void clock_was_set_work(struct work_struct *work)
-{
- clock_was_set();
-}
-
-static DECLARE_WORK(hrtimer_work, clock_was_set_work);
-
-/*
- * Called from timekeeping and resume code to reprogram the hrtimer
- * interrupt device on all cpus.
- */
-void clock_was_set_delayed(void)
-{
- schedule_work(&hrtimer_work);
-}
-
#else
static inline int hrtimer_is_hres_enabled(void) { return 0; }
@@ -874,6 +876,22 @@ void clock_was_set(void)
timerfd_clock_was_set();
}
+static void clock_was_set_work(struct work_struct *work)
+{
+ clock_was_set();
+}
+
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+
+/*
+ * Called from timekeeping and resume code to reprogram the hrtimer
+ * interrupt device on all cpus and to notify timerfd.
+ */
+void clock_was_set_delayed(void)
+{
+ schedule_work(&hrtimer_work);
+}
+
/*
* During resume we might have to reprogram the high resolution timer
* interrupt on all online CPUs. However, all other CPUs will be
@@ -1013,12 +1031,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
* remove hrtimer, called with base lock held
*/
static inline int
-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
+ bool restart, bool keep_local)
{
u8 state = timer->state;
if (state & HRTIMER_STATE_ENQUEUED) {
- int reprogram;
+ bool reprogram;
/*
* Remove the timer and force reprogramming when high
@@ -1031,8 +1050,16 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
debug_deactivate(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ /*
+ * If the timer is not restarted then reprogramming is
+ * required if the timer is local. If it is local and about
+ * to be restarted, avoid programming it twice (on removal
+ * and a moment later when it's requeued).
+ */
if (!restart)
state = HRTIMER_STATE_INACTIVE;
+ else
+ reprogram &= !keep_local;
__remove_hrtimer(timer, base, state, reprogram);
return 1;
@@ -1086,9 +1113,31 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base)
{
struct hrtimer_clock_base *new_base;
+ bool force_local, first;
- /* Remove an active timer from the queue: */
- remove_hrtimer(timer, base, true);
+ /*
+ * If the timer is on the local cpu base and is the first expiring
+ * timer then this might end up reprogramming the hardware twice
+ * (on removal and on enqueue). To avoid that by prevent the
+ * reprogram on removal, keep the timer local to the current CPU
+ * and enforce reprogramming after it is queued no matter whether
+ * it is the new first expiring timer again or not.
+ */
+ force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ force_local &= base->cpu_base->next_timer == timer;
+
+ /*
+ * Remove an active timer from the queue. In case it is not queued
+ * on the current CPU, make sure that remove_hrtimer() updates the
+ * remote data correctly.
+ *
+ * If it's on the current CPU and the first expiring timer, then
+ * skip reprogramming, keep the timer local and enforce
+ * reprogramming later if it was the first expiring timer. This
+ * avoids programming the underlying clock event twice (once at
+ * removal and once after enqueue).
+ */
+ remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL)
tim = ktime_add_safe(tim, base->get_time());
@@ -1098,9 +1147,24 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
hrtimer_set_expires_range_ns(timer, tim, delta_ns);
/* Switch the timer base, if necessary: */
- new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
+ if (!force_local) {
+ new_base = switch_hrtimer_base(timer, base,
+ mode & HRTIMER_MODE_PINNED);
+ } else {
+ new_base = base;
+ }
- return enqueue_hrtimer(timer, new_base, mode);
+ first = enqueue_hrtimer(timer, new_base, mode);
+ if (!force_local)
+ return first;
+
+ /*
+ * Timer was forced to stay on the current CPU to avoid
+ * reprogramming on removal and enqueue. Force reprogram the
+ * hardware by evaluating the new first expiring timer.
+ */
+ hrtimer_force_reprogram(new_base->cpu_base, 1);
+ return 0;
}
/**
@@ -1166,7 +1230,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer))
- ret = remove_hrtimer(timer, base, false);
+ ret = remove_hrtimer(timer, base, false, false);
unlock_hrtimer_base(timer, &flags);
@@ -1640,8 +1704,8 @@ retry:
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
- /* Reevaluate the clock bases for the next expiry */
- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
+ /* Reevaluate the clock bases for the [soft] next expiry */
+ expires_next = hrtimer_update_next_event(cpu_base);
/*
* Store the new expiry value so the migration code can verify
* against it.
@@ -1935,9 +1999,9 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
}
restart = &current->restart_block;
- restart->fn = hrtimer_nanosleep_restart;
restart->nanosleep.clockid = t.timer.base->clockid;
restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+ set_restart_fn(restart, hrtimer_nanosleep_restart);
out:
destroy_hrtimer_on_stack(&t.timer);
return ret;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 77f1e5635cc1..62dc9757118c 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -147,10 +147,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = ktime_to_ns(timeval_to_ktime(value->it_value));
ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval));
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 42d512fcfda2..eacb0ca30193 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1335,8 +1335,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
- restart_block->fn = posix_cpu_nsleep_restart;
restart_block->nanosleep.clockid = which_clock;
+ set_restart_fn(restart_block, posix_cpu_nsleep_restart);
}
return error;
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0ec5b7a1d769..97d4a9dcf339 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1169,8 +1169,8 @@ SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
err = do_clock_adjtime(which_clock, &ktx);
- if (err >= 0)
- err = put_old_timex32(utp, &ktx);
+ if (err >= 0 && put_old_timex32(utp, &ktx))
+ return -EFAULT;
return err;
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 59225b484e4e..7e5d3524e924 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/nmi.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
@@ -558,6 +559,7 @@ void tick_unfreeze(void)
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
} else {
+ touch_softlockup_watchdog();
tick_resume_local();
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7b2496136729..5294f5b1f955 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -165,3 +165,6 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
void timer_clear_idle(void);
+
+void clock_was_set(void);
+void clock_was_set_delayed(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5c9fcc72460d..5eb04bb59802 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -131,7 +131,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
*/
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
- WARN_ON(tick_nohz_full_running);
+ WARN_ON_ONCE(tick_nohz_full_running);
#endif
tick_do_timer_cpu = cpu;
}
@@ -916,13 +916,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
*/
if (tick_do_timer_cpu == cpu)
return false;
- /*
- * Boot safety: make sure the timekeeping duty has been
- * assigned before entering dyntick-idle mode,
- * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
- */
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
- return false;
/* Should not happen for nohz-full */
if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4fc2af4367a7..e23c9e765a5f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/time.h>
+#include <linux/timex.h>
#include <linux/tick.h>
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
@@ -1236,8 +1237,7 @@ int do_settimeofday64(const struct timespec64 *ts)
timekeeping_forward_now(tk);
xt = tk_xtime(tk);
- ts_delta.tv_sec = ts->tv_sec - xt.tv_sec;
- ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec;
+ ts_delta = timespec64_sub(*ts, xt);
if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
ret = -EINVAL;
@@ -2305,6 +2305,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
return 0;
}
+/**
+ * random_get_entropy_fallback - Returns the raw clock source value,
+ * used by random.c for platforms with no valid random_get_entropy().
+ */
+unsigned long random_get_entropy_fallback(void)
+{
+ struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
+ struct clocksource *clock = READ_ONCE(tkr->clock);
+
+ if (unlikely(timekeeping_suspended || !clock))
+ return 0;
+ return clock->read(clock);
+}
+EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a3ae244b1bcd..16a2b62f5f74 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1269,8 +1269,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base)
static void timer_sync_wait_running(struct timer_base *base)
{
if (atomic_read(&base->timer_waiters)) {
+ raw_spin_unlock_irq(&base->lock);
spin_unlock(&base->expiry_lock);
spin_lock(&base->expiry_lock);
+ raw_spin_lock_irq(&base->lock);
}
}
@@ -1454,14 +1456,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
call_timer_fn(timer, fn, baseclk);
- base->running_timer = NULL;
raw_spin_lock(&base->lock);
+ base->running_timer = NULL;
} else {
raw_spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn, baseclk);
+ raw_spin_lock_irq(&base->lock);
base->running_timer = NULL;
timer_sync_wait_running(base);
- raw_spin_lock_irq(&base->lock);
}
}
}
@@ -1743,13 +1745,6 @@ void update_process_times(int user_tick)
scheduler_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
run_posix_cpu_timers();
-
- /* The current CPU might make use of net randoms without receiving IRQs
- * to renew them often enough. Let's update the net_rand_state from a
- * non-constant value that's not affine to the number of calls to make
- * sure it's updated when there's some activity (we don't care in idle).
- */
- this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
}
/**