aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c15
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/time/Kconfig2
-rw-r--r--kernel/time/Makefile5
-rw-r--r--kernel/time/clockevents.c40
-rw-r--r--kernel/time/ntp.c5
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c106
-rw-r--r--kernel/time/tick-broadcast.c85
-rw-r--r--kernel/time/tick-common.c16
-rw-r--r--kernel/time/tick-internal.h11
-rw-r--r--kernel/time/timekeeping_debug.c2
-rw-r--r--kernel/timer.c57
-rw-r--r--kernel/workqueue.c7
13 files changed, 290 insertions, 66 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 09094361dce5..d55092ceee29 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -168,19 +168,6 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
-
-/*
- * Get the preferred target CPU for NOHZ
- */
-static int hrtimer_get_target(int this_cpu, int pinned)
-{
-#ifdef CONFIG_NO_HZ_COMMON
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
- return get_nohz_timer_target();
-#endif
- return this_cpu;
-}
-
/*
* With HIGHRES=y we do not migrate the timer when it is expiring
* before the next event on the target cpu because we cannot reprogram
@@ -214,7 +201,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
int this_cpu = smp_processor_id();
- int cpu = hrtimer_get_target(this_cpu, pinned);
+ int cpu = get_nohz_timer_target(pinned);
int basenum = base->index;
again:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d11a1768357d..3c4d096544ce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -555,12 +555,15 @@ void resched_cpu(int cpu)
* selecting an idle cpu will add more delays to the timers than intended
* (as that cpu's timer base may not be uptodate wrt jiffies etc).
*/
-int get_nohz_timer_target(void)
+int get_nohz_timer_target(int pinned)
{
int cpu = smp_processor_id();
int i;
struct sched_domain *sd;
+ if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
+ return cpu;
+
rcu_read_lock();
for_each_domain(cpu, sd) {
for_each_cpu(i, sched_domain_span(sd)) {
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 3ce6e8c5f3fc..f448513a45ed 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -124,7 +124,7 @@ config NO_HZ_FULL
endchoice
config NO_HZ_FULL_ALL
- bool "Full dynticks system on all CPUs by default"
+ bool "Full dynticks system on all CPUs by default (except CPU 0)"
depends on NO_HZ_FULL
help
If the user doesn't pass the nohz_full boot option to
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 9250130646f5..57a413fd0ebf 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -3,7 +3,10 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
+ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
+ obj-y += tick-broadcast.o
+ obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o
+endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 086ad6043bcb..ad362c260ef4 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -439,6 +439,19 @@ void clockevents_config_and_register(struct clock_event_device *dev,
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);
+int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
+{
+ clockevents_config(dev, freq);
+
+ if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+ return clockevents_program_event(dev, dev->next_event, false);
+
+ if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+ dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+
+ return 0;
+}
+
/**
* clockevents_update_freq - Update frequency and reprogram a clock event device.
* @dev: device to modify
@@ -446,17 +459,22 @@ EXPORT_SYMBOL_GPL(clockevents_config_and_register);
*
* Reconfigure and reprogram a clock event device in oneshot
* mode. Must be called on the cpu for which the device delivers per
- * cpu timer events with interrupts disabled! Returns 0 on success,
- * -ETIME when the event is in the past.
+ * cpu timer events. If called for the broadcast device the core takes
+ * care of serialization.
+ *
+ * Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
- clockevents_config(dev, freq);
-
- if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
- return 0;
+ unsigned long flags;
+ int ret;
- return clockevents_program_event(dev, dev->next_event, false);
+ local_irq_save(flags);
+ ret = tick_broadcast_update_freq(dev, freq);
+ if (ret == -ENODEV)
+ ret = __clockevents_update_freq(dev, freq);
+ local_irq_restore(flags);
+ return ret;
}
/*
@@ -524,12 +542,13 @@ void clockevents_resume(void)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
/**
* clockevents_notify - notification about relevant events
+ * Returns 0 on success, any other value on error
*/
-void clockevents_notify(unsigned long reason, void *arg)
+int clockevents_notify(unsigned long reason, void *arg)
{
struct clock_event_device *dev, *tmp;
unsigned long flags;
- int cpu;
+ int cpu, ret = 0;
raw_spin_lock_irqsave(&clockevents_lock, flags);
@@ -542,7 +561,7 @@ void clockevents_notify(unsigned long reason, void *arg)
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
- tick_broadcast_oneshot_control(reason);
+ ret = tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DYING:
@@ -585,6 +604,7 @@ void clockevents_notify(unsigned long reason, void *arg)
break;
}
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(clockevents_notify);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index af8d1d4f3d55..419a52cecd20 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -514,12 +514,13 @@ static void sync_cmos_clock(struct work_struct *work)
next.tv_sec++;
next.tv_nsec -= NSEC_PER_SEC;
}
- schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
+ queue_delayed_work(system_power_efficient_wq,
+ &sync_cmos_work, timespec_to_jiffies(&next));
}
void ntp_notify_cmos_timer(void)
{
- schedule_delayed_work(&sync_cmos_work, 0);
+ queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
}
#else
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
new file mode 100644
index 000000000000..eb682d5c697c
--- /dev/null
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -0,0 +1,106 @@
+/*
+ * linux/kernel/time/tick-broadcast-hrtimer.c
+ * This file emulates a local clock event device
+ * via a pseudo clock device.
+ */
+#include <linux/cpu.h>
+#include <linux/err.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/profile.h>
+#include <linux/clockchips.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#include "tick-internal.h"
+
+static struct hrtimer bctimer;
+
+static void bc_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *bc)
+{
+ switch (mode) {
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ /*
+ * Note, we cannot cancel the timer here as we might
+ * run into the following live lock scenario:
+ *
+ * cpu 0 cpu1
+ * lock(broadcast_lock);
+ * hrtimer_interrupt()
+ * bc_handler()
+ * tick_handle_oneshot_broadcast();
+ * lock(broadcast_lock);
+ * hrtimer_cancel()
+ * wait_for_callback()
+ */
+ hrtimer_try_to_cancel(&bctimer);
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * This is called from the guts of the broadcast code when the cpu
+ * which is about to enter idle has the earliest broadcast timer event.
+ */
+static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
+{
+ /*
+ * We try to cancel the timer first. If the callback is on
+ * flight on some other cpu then we let it handle it. If we
+ * were able to cancel the timer nothing can rearm it as we
+ * own broadcast_lock.
+ *
+ * However we can also be called from the event handler of
+ * ce_broadcast_hrtimer itself when it expires. We cannot
+ * restart the timer because we are in the callback, but we
+ * can set the expiry time and let the callback return
+ * HRTIMER_RESTART.
+ */
+ if (hrtimer_try_to_cancel(&bctimer) >= 0) {
+ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
+ /* Bind the "device" to the cpu */
+ bc->bound_on = smp_processor_id();
+ } else if (bc->bound_on == smp_processor_id()) {
+ hrtimer_set_expires(&bctimer, expires);
+ }
+ return 0;
+}
+
+static struct clock_event_device ce_broadcast_hrtimer = {
+ .set_mode = bc_set_mode,
+ .set_next_ktime = bc_set_next,
+ .features = CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_KTIME |
+ CLOCK_EVT_FEAT_HRTIMER,
+ .rating = 0,
+ .bound_on = -1,
+ .min_delta_ns = 1,
+ .max_delta_ns = KTIME_MAX,
+ .min_delta_ticks = 1,
+ .max_delta_ticks = ULONG_MAX,
+ .mult = 1,
+ .shift = 0,
+ .cpumask = cpu_all_mask,
+};
+
+static enum hrtimer_restart bc_handler(struct hrtimer *t)
+{
+ ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
+
+ if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
+ return HRTIMER_NORESTART;
+
+ return HRTIMER_RESTART;
+}
+
+void tick_setup_hrtimer_broadcast(void)
+{
+ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ bctimer.function = bc_handler;
+ clockevents_register_device(&ce_broadcast_hrtimer);
+}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 98977a57ac72..64c5990fd500 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -120,6 +120,19 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
return (dev && tick_broadcast_device.evtdev == dev);
}
+int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
+{
+ int ret = -ENODEV;
+
+ if (tick_is_broadcast_device(dev)) {
+ raw_spin_lock(&tick_broadcast_lock);
+ ret = __clockevents_update_freq(dev, freq);
+ raw_spin_unlock(&tick_broadcast_lock);
+ }
+ return ret;
+}
+
+
static void err_broadcast(const struct cpumask *mask)
{
pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
@@ -272,12 +285,8 @@ static void tick_do_broadcast(struct cpumask *mask)
*/
static void tick_do_periodic_broadcast(void)
{
- raw_spin_lock(&tick_broadcast_lock);
-
cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
tick_do_broadcast(tmpmask);
-
- raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -287,13 +296,15 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
ktime_t next;
+ raw_spin_lock(&tick_broadcast_lock);
+
tick_do_periodic_broadcast();
/*
* The device is in periodic mode. No reprogramming necessary:
*/
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
- return;
+ goto unlock;
/*
* Setup the next period for devices, which do not have
@@ -306,9 +317,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
- return;
+ goto unlock;
tick_do_periodic_broadcast();
}
+unlock:
+ raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -630,24 +643,61 @@ again:
raw_spin_unlock(&tick_broadcast_lock);
}
+static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
+{
+ if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
+ return 0;
+ if (bc->next_event.tv64 == KTIME_MAX)
+ return 0;
+ return bc->bound_on == cpu ? -EBUSY : 0;
+}
+
+static void broadcast_shutdown_local(struct clock_event_device *bc,
+ struct clock_event_device *dev)
+{
+ /*
+ * For hrtimer based broadcasting we cannot shutdown the cpu
+ * local device if our own event is the first one to expire or
+ * if we own the broadcast timer.
+ */
+ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
+ if (broadcast_needs_cpu(bc, smp_processor_id()))
+ return;
+ if (dev->next_event.tv64 < bc->next_event.tv64)
+ return;
+ }
+ clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+}
+
+static void broadcast_move_bc(int deadcpu)
+{
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
+ if (!bc || !broadcast_needs_cpu(bc, deadcpu))
+ return;
+ /* This moves the broadcast assignment to this cpu */
+ clockevents_program_event(bc, bc->next_event, 1);
+}
+
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
+ * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
*/
-void tick_broadcast_oneshot_control(unsigned long reason)
+int tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
ktime_t now;
- int cpu;
+ int cpu, ret = 0;
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
- return;
+ return 0;
/*
* We are called with preemtion disabled from the depth of the
@@ -658,7 +708,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
- return;
+ return 0;
bc = tick_broadcast_device.evtdev;
@@ -666,7 +716,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
- clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+ broadcast_shutdown_local(bc, dev);
/*
* We only reprogram the broadcast timer if we
* did not mark ourself in the force mask and
@@ -679,6 +729,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
}
+ /*
+ * If the current CPU owns the hrtimer broadcast
+ * mechanism, it cannot go deep idle and we remove the
+ * CPU from the broadcast mask. We don't have to go
+ * through the EXIT path as the local timer is not
+ * shutdown.
+ */
+ ret = broadcast_needs_cpu(bc, cpu);
+ if (ret)
+ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
@@ -746,6 +806,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ return ret;
}
/*
@@ -852,6 +913,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
+ broadcast_move_bc(cpu);
+
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 20b2fe37d105..015661279b68 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -98,18 +98,19 @@ static void tick_periodic(int cpu)
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
- ktime_t next;
+ ktime_t next = dev->next_event;
tick_periodic(cpu);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return;
- /*
- * Setup the next period for devices, which do not have
- * periodic mode:
- */
- next = ktime_add(dev->next_event, tick_period);
for (;;) {
+ /*
+ * Setup the next period for devices, which do not have
+ * periodic mode:
+ */
+ next = ktime_add(next, tick_period);
+
if (!clockevents_program_event(dev, next, false))
return;
/*
@@ -118,12 +119,11 @@ void tick_handle_periodic(struct clock_event_device *dev)
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
- * when then will increment time, posibly causing
+ * which then will increment time, possibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
- next = ktime_add(next, tick_period);
}
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 8329669b51ec..7ab92b19965a 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -46,7 +46,7 @@ extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern void tick_broadcast_oneshot_control(unsigned long reason);
+extern int tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
@@ -58,7 +58,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
-static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
+static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
@@ -87,7 +87,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
-static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
+static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
@@ -111,6 +111,7 @@ extern int tick_resume_broadcast(void);
extern void tick_broadcast_init(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
+int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
#else /* !BROADCAST */
@@ -133,6 +134,8 @@ static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
static inline void tick_suspend_broadcast(void) { }
static inline int tick_resume_broadcast(void) { return 0; }
static inline void tick_broadcast_init(void) { }
+static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
+ u32 freq) { return -ENODEV; }
/*
* Set the periodic handler in non broadcast mode
@@ -152,6 +155,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
+int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
+
#endif
extern void do_timer(unsigned long ticks);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 802433a4f5eb..4d54f97558df 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -21,6 +21,8 @@
#include <linux/seq_file.h>
#include <linux/time.h>
+#include "timekeeping_internal.h"
+
static unsigned int sleep_time_bin[32] = {0};
static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
diff --git a/kernel/timer.c b/kernel/timer.c
index d78de047599b..87bd529879c2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -81,6 +81,7 @@ struct tvec_base {
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
+ unsigned long all_timers;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@@ -337,6 +338,20 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
}
EXPORT_SYMBOL_GPL(set_timer_slack);
+/*
+ * If the list is empty, catch up ->timer_jiffies to the current time.
+ * The caller must hold the tvec_base lock. Returns true if the list
+ * was empty and therefore ->timer_jiffies was updated.
+ */
+static bool catchup_timer_jiffies(struct tvec_base *base)
+{
+ if (!base->all_timers) {
+ base->timer_jiffies = jiffies;
+ return true;
+ }
+ return false;
+}
+
static void
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
@@ -383,15 +398,17 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
+ (void)catchup_timer_jiffies(base);
__internal_add_timer(base, timer);
/*
* Update base->active_timers and base->next_timer
*/
if (!tbase_get_deferrable(timer->base)) {
- if (time_before(timer->expires, base->next_timer))
+ if (!base->active_timers++ ||
+ time_before(timer->expires, base->next_timer))
base->next_timer = timer->expires;
- base->active_timers++;
}
+ base->all_timers++;
}
#ifdef CONFIG_TIMER_STATS
@@ -671,6 +688,8 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
detach_timer(timer, true);
if (!tbase_get_deferrable(timer->base))
base->active_timers--;
+ base->all_timers--;
+ (void)catchup_timer_jiffies(base);
}
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -685,6 +704,8 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
+ base->all_timers--;
+ (void)catchup_timer_jiffies(base);
return 1;
}
@@ -739,12 +760,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- cpu = smp_processor_id();
-
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
- cpu = get_nohz_timer_target();
-#endif
+ cpu = get_nohz_timer_target(pinned);
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) {
@@ -939,8 +955,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
* with the timer by holding the timer base lock. This also
* makes sure that a CPU on the way to stop its tick can not
* evaluate the timer wheel.
+ *
+ * Spare the IPI for deferrable timers on idle targets though.
+ * The next busy ticks will take care of it. Except full dynticks
+ * require special care against races with idle_cpu(), lets deal
+ * with that later.
*/
- wake_up_nohz_cpu(cpu);
+ if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu))
+ wake_up_nohz_cpu(cpu);
+
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);
@@ -1146,6 +1169,10 @@ static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
spin_lock_irq(&base->lock);
+ if (catchup_timer_jiffies(base)) {
+ spin_unlock_irq(&base->lock);
+ return;
+ }
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list;
struct list_head *head = &work_list;
@@ -1160,7 +1187,7 @@ static inline void __run_timers(struct tvec_base *base)
!cascade(base, &base->tv4, INDEX(2)))
cascade(base, &base->tv5, INDEX(3));
++base->timer_jiffies;
- list_replace_init(base->tv1.vec + index, &work_list);
+ list_replace_init(base->tv1.vec + index, head);
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
@@ -1523,9 +1550,8 @@ static int init_timers_cpu(int cpu)
if (!base)
return -ENOMEM;
- /* Make sure that tvec_base is 2 byte aligned */
- if (tbase_get_deferrable(base)) {
- WARN_ON(1);
+ /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
+ if (WARN_ON(base != tbase_get_base(base))) {
kfree(base);
return -ENOMEM;
}
@@ -1559,6 +1585,7 @@ static int init_timers_cpu(int cpu)
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
base->active_timers = 0;
+ base->all_timers = 0;
return 0;
}
@@ -1648,9 +1675,9 @@ void __init init_timers(void)
err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
- init_timer_stats();
-
BUG_ON(err != NOTIFY_OK);
+
+ init_timer_stats();
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3fa5b8f3aae3..0ee63af30bd1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -516,6 +516,13 @@ void destroy_work_on_stack(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+void destroy_delayed_work_on_stack(struct delayed_work *work)
+{
+ destroy_timer_on_stack(&work->timer);
+ debug_object_free(&work->work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
+
#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }