aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c290
1 files changed, 186 insertions, 104 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 8533c3796082..cb1b86a9c52f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,6 +34,8 @@
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/delay.h>
+#include <linux/tick.h>
+#include <linux/kallsyms.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+ if (timer->start_site)
+ return;
+
+ timer->start_site = addr;
+ memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+ timer->start_pid = current->pid;
+}
+#endif
+
/**
* init_timer - initialize a timer.
* @timer: the timer to be initialized
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+#ifdef CONFIG_TIMER_STATS
+ timer->start_site = NULL;
+ timer->start_pid = -1;
+ memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
}
EXPORT_SYMBOL(init_timer);
static inline void detach_timer(struct timer_list *timer,
- int clear_pending)
+ int clear_pending)
{
struct list_head *entry = &timer->entry;
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
unsigned long flags;
int ret = 0;
+ timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
base = lock_timer_base(timer, &flags);
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
tvec_base_t *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
+ timer_stats_timer_set_start_info(timer);
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
timer->base = base;
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
{
BUG_ON(!timer->function);
+ timer_stats_timer_set_start_info(timer);
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer)
unsigned long flags;
int ret = 0;
+ timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
fn = timer->function;
data = timer->data;
+ timer_stats_account_timer(timer);
+
set_running_timer(base, timer);
detach_timer(timer, 1);
spin_unlock_irq(&base->lock);
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base)
spin_unlock_irq(&base->lock);
}
-#ifdef CONFIG_NO_IDLE_HZ
+#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a cpus is idle.
* This functions needs to be called disabled.
*/
-unsigned long next_timer_interrupt(void)
+static unsigned long __next_timer_interrupt(tvec_base_t *base)
{
- tvec_base_t *base;
- struct list_head *list;
+ unsigned long timer_jiffies = base->timer_jiffies;
+ unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
+ int index, slot, array, found = 0;
struct timer_list *nte;
- unsigned long expires;
- unsigned long hr_expires = MAX_JIFFY_OFFSET;
- ktime_t hr_delta;
tvec_t *varray[4];
- int i, j;
-
- hr_delta = hrtimer_get_next_event();
- if (hr_delta.tv64 != KTIME_MAX) {
- struct timespec tsdelta;
- tsdelta = ktime_to_timespec(hr_delta);
- hr_expires = timespec_to_jiffies(&tsdelta);
- if (hr_expires < 3)
- return hr_expires + jiffies;
- }
- hr_expires += jiffies;
-
- base = __get_cpu_var(tvec_bases);
- spin_lock(&base->lock);
- expires = base->timer_jiffies + (LONG_MAX >> 1);
- list = NULL;
/* Look for timer events in tv1. */
- j = base->timer_jiffies & TVR_MASK;
+ index = slot = timer_jiffies & TVR_MASK;
do {
- list_for_each_entry(nte, base->tv1.vec + j, entry) {
+ list_for_each_entry(nte, base->tv1.vec + slot, entry) {
+ found = 1;
expires = nte->expires;
- if (j < (base->timer_jiffies & TVR_MASK))
- list = base->tv2.vec + (INDEX(0));
- goto found;
+ /* Look at the cascade bucket(s)? */
+ if (!index || slot < index)
+ goto cascade;
+ return expires;
}
- j = (j + 1) & TVR_MASK;
- } while (j != (base->timer_jiffies & TVR_MASK));
+ slot = (slot + 1) & TVR_MASK;
+ } while (slot != index);
+
+cascade:
+ /* Calculate the next cascade event */
+ if (index)
+ timer_jiffies += TVR_SIZE - index;
+ timer_jiffies >>= TVR_BITS;
/* Check tv2-tv5. */
varray[0] = &base->tv2;
varray[1] = &base->tv3;
varray[2] = &base->tv4;
varray[3] = &base->tv5;
- for (i = 0; i < 4; i++) {
- j = INDEX(i);
+
+ for (array = 0; array < 4; array++) {
+ tvec_t *varp = varray[array];
+
+ index = slot = timer_jiffies & TVN_MASK;
do {
- if (list_empty(varray[i]->vec + j)) {
- j = (j + 1) & TVN_MASK;
- continue;
- }
- list_for_each_entry(nte, varray[i]->vec + j, entry)
+ list_for_each_entry(nte, varp->vec + slot, entry) {
+ found = 1;
if (time_before(nte->expires, expires))
expires = nte->expires;
- if (j < (INDEX(i)) && i < 3)
- list = varray[i + 1]->vec + (INDEX(i + 1));
- goto found;
- } while (j != (INDEX(i)));
- }
-found:
- if (list) {
- /*
- * The search wrapped. We need to look at the next list
- * from next tv element that would cascade into tv element
- * where we found the timer element.
- */
- list_for_each_entry(nte, list, entry) {
- if (time_before(nte->expires, expires))
- expires = nte->expires;
- }
+ }
+ /*
+ * Do we still search for the first timer or are
+ * we looking up the cascade buckets ?
+ */
+ if (found) {
+ /* Look at the cascade bucket(s)? */
+ if (!index || slot < index)
+ break;
+ return expires;
+ }
+ slot = (slot + 1) & TVN_MASK;
+ } while (slot != index);
+
+ if (index)
+ timer_jiffies += TVN_SIZE - index;
+ timer_jiffies >>= TVN_BITS;
}
- spin_unlock(&base->lock);
+ return expires;
+}
- /*
- * It can happen that other CPUs service timer IRQs and increment
- * jiffies, but we have not yet got a local timer tick to process
- * the timer wheels. In that case, the expiry time can be before
- * jiffies, but since the high-resolution timer here is relative to
- * jiffies, the default expression when high-resolution timers are
- * not active,
- *
- * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
- *
- * would falsely evaluate to true. If that is the case, just
- * return jiffies so that we can immediately fire the local timer
- */
- if (time_before(expires, jiffies))
- return jiffies;
+/*
+ * Check, if the next hrtimer event is before the next timer wheel
+ * event:
+ */
+static unsigned long cmp_next_hrtimer_event(unsigned long now,
+ unsigned long expires)
+{
+ ktime_t hr_delta = hrtimer_get_next_event();
+ struct timespec tsdelta;
+
+ if (hr_delta.tv64 == KTIME_MAX)
+ return expires;
- if (time_before(hr_expires, expires))
- return hr_expires;
+ if (hr_delta.tv64 <= TICK_NSEC)
+ return now;
+ tsdelta = ktime_to_timespec(hr_delta);
+ now += timespec_to_jiffies(&tsdelta);
+ if (time_before(now, expires))
+ return now;
return expires;
}
+
+/**
+ * next_timer_interrupt - return the jiffy of the next pending timer
+ */
+unsigned long get_next_timer_interrupt(unsigned long now)
+{
+ tvec_base_t *base = __get_cpu_var(tvec_bases);
+ unsigned long expires;
+
+ spin_lock(&base->lock);
+ expires = __next_timer_interrupt(base);
+ spin_unlock(&base->lock);
+
+ if (time_before_eq(expires, now))
+ return now;
+
+ return cmp_next_hrtimer_event(now, expires);
+}
+
+#ifdef CONFIG_NO_IDLE_HZ
+unsigned long next_timer_interrupt(void)
+{
+ return get_next_timer_interrupt(jiffies);
+}
+#endif
+
#endif
/******************************************************************/
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday);
*
* Accumulates current time interval and initializes new clocksource
*/
-static int change_clocksource(void)
+static void change_clocksource(void)
{
struct clocksource *new;
cycle_t now;
u64 nsec;
+
new = clocksource_get_next();
- if (clock != new) {
- now = clocksource_read(new);
- nsec = __get_nsec_offset();
- timespec_add_ns(&xtime, nsec);
-
- clock = new;
- clock->cycle_last = now;
- printk(KERN_INFO "Time: %s clocksource has been installed.\n",
- clock->name);
- return 1;
- } else if (clock->update_callback) {
- return clock->update_callback();
- }
- return 0;
+
+ if (clock == new)
+ return;
+
+ now = clocksource_read(new);
+ nsec = __get_nsec_offset();
+ timespec_add_ns(&xtime, nsec);
+
+ clock = new;
+ clock->cycle_last = now;
+
+ clock->error = 0;
+ clock->xtime_nsec = 0;
+ clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+
+ tick_clock_notify();
+
+ printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+ clock->name);
}
#else
-static inline int change_clocksource(void)
-{
- return 0;
-}
+static inline void change_clocksource(void) { }
#endif
/**
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void)
do {
seq = read_seqbegin(&xtime_lock);
- ret = clock->is_continuous;
+ ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqretry(&xtime_lock, seq));
return ret;
}
+/**
+ * read_persistent_clock - Return time in seconds from the persistent clock.
+ *
+ * Weak dummy function for arches that do not yet support it.
+ * Returns seconds from epoch using the battery backed persistent clock.
+ * Returns zero if unsupported.
+ *
+ * XXX - Do be sure to remove it once all arches implement it.
+ */
+unsigned long __attribute__((weak)) read_persistent_clock(void)
+{
+ return 0;
+}
+
/*
* timekeeping_init - Initializes the clocksource and common timekeeping values
*/
void __init timekeeping_init(void)
{
unsigned long flags;
+ unsigned long sec = read_persistent_clock();
write_seqlock_irqsave(&xtime_lock, flags);
ntp_clear();
clock = clocksource_get_next();
- clocksource_calculate_interval(clock, tick_nsec);
+ clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
clock->cycle_last = clocksource_read(clock);
+ xtime.tv_sec = sec;
+ xtime.tv_nsec = 0;
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+
write_sequnlock_irqrestore(&xtime_lock, flags);
}
-
+/* flag for if timekeeping is suspended */
static int timekeeping_suspended;
+/* time in seconds when suspend began */
+static unsigned long timekeeping_suspend_time;
+
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
* @dev: unused
@@ -909,13 +979,26 @@ static int timekeeping_suspended;
static int timekeeping_resume(struct sys_device *dev)
{
unsigned long flags;
+ unsigned long now = read_persistent_clock();
write_seqlock_irqsave(&xtime_lock, flags);
- /* restart the last cycle value */
+
+ if (now && (now > timekeeping_suspend_time)) {
+ unsigned long sleep_length = now - timekeeping_suspend_time;
+
+ xtime.tv_sec += sleep_length;
+ wall_to_monotonic.tv_sec -= sleep_length;
+ }
+ /* re-base the last cycle value */
clock->cycle_last = clocksource_read(clock);
clock->error = 0;
timekeeping_suspended = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ touch_softlockup_watchdog();
+ /* Resume hrtimers */
+ clock_was_set();
+
return 0;
}
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
write_seqlock_irqsave(&xtime_lock, flags);
timekeeping_suspended = 1;
+ timekeeping_suspend_time = read_persistent_clock();
write_sequnlock_irqrestore(&xtime_lock, flags);
return 0;
}
@@ -1089,11 +1173,8 @@ static void update_wall_time(void)
clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
/* check to see if there is a new clocksource to use */
- if (change_clocksource()) {
- clock->error = 0;
- clock->xtime_nsec = 0;
- clocksource_calculate_interval(clock, tick_nsec);
- }
+ change_clocksource();
+ update_vsyscall(&xtime, clock);
}
/*
@@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks)
* This read-write spinlock protects us from races in SMP while
* playing with xtime and avenrun.
*/
-#ifndef ARCH_HAVE_XTIME_LOCK
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
+__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
EXPORT_SYMBOL(xtime_lock);
-#endif
/*
* This function runs timers and the timer-tq in bottom half context.
@@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
tvec_base_t *base = __get_cpu_var(tvec_bases);
- hrtimer_run_queues();
+ hrtimer_run_queues();
+
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}
@@ -1621,6 +1701,8 @@ void __init init_timers(void)
int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
+ init_timer_stats();
+
BUG_ON(err == NOTIFY_BAD);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);