aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-06 08:13:18 +0200
committerIngo Molnar <mingo@elte.hu>2008-10-06 08:13:18 +0200
commit2c10c22af088ab5d94fae93ce3fe6436b2a208b4 (patch)
treedb27721001f194327ddbdcd6c983c4ec68b77c00 /kernel
parentsched_rt.c: resch needed in rt_rq_enqueue() for the root rt_rq (diff)
parentMerge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip (diff)
downloadlinux-dev-2c10c22af088ab5d94fae93ce3fe6436b2a208b4.tar.xz
linux-dev-2c10c22af088ab5d94fae93ce3fe6436b2a208b4.zip
Merge branch 'linus' into sched/devel
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c5
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/hrtimer.c95
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/kgdb.c10
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/time/tick-broadcast.c14
-rw-r--r--kernel/time/tick-common.c10
-rw-r--r--kernel/time/tick-internal.h7
-rw-r--r--kernel/time/tick-sched.c13
-rw-r--r--kernel/trace/trace_sysprof.c2
12 files changed, 147 insertions, 35 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13932abde159..a0123d75ec9a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2738,14 +2738,15 @@ void cgroup_fork_callbacks(struct task_struct *child)
*/
void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
{
- struct cgroup *oldcgrp, *newcgrp;
+ struct cgroup *oldcgrp, *newcgrp = NULL;
if (need_mm_owner_callback) {
int i;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
oldcgrp = task_cgroup(old, ss->subsys_id);
- newcgrp = task_cgroup(new, ss->subsys_id);
+ if (new)
+ newcgrp = task_cgroup(new, ss->subsys_id);
if (oldcgrp == newcgrp)
continue;
if (ss->mm_owner_changed)
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..85a83c831856 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -583,8 +583,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
* If there are other users of the mm and the owner (us) is exiting
* we need to find a new owner to take on the responsibility.
*/
- if (!mm)
- return 0;
if (atomic_read(&mm->mm_users) <= 1)
return 0;
if (mm->owner != p)
@@ -627,6 +625,16 @@ retry:
} while_each_thread(g, c);
read_unlock(&tasklist_lock);
+ /*
+ * We found no owner yet mm_users > 1: this implies that we are
+ * most likely racing with swapoff (try_to_unuse()) or /proc or
+ * ptrace or page migration (get_task_mm()). Mark owner as NULL,
+ * so that subsystems can understand the callback and take action.
+ */
+ down_write(&mm->mmap_sem);
+ cgroup_mm_owner_callbacks(mm->owner, NULL);
+ mm->owner = NULL;
+ up_write(&mm->mmap_sem);
return;
assign_new_owner:
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b8e4dce80a74..cdec83e722fa 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
*/
BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
return 1;
- case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+ case HRTIMER_CB_IRQSAFE_PERCPU:
+ case HRTIMER_CB_IRQSAFE_UNLOCKED:
/*
* This is solely for the sched tick emulation with
* dynamic tick support to ensure that we do not
* restart the tick right on the edge and end up with
* the tick timer in the softirq ! The calling site
- * takes care of this.
+ * takes care of this. Also used for hrtimer sleeper !
*/
debug_hrtimer_deactivate(timer);
return 1;
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
timer_stats_account_hrtimer(timer);
fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+ timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
/*
* Used for scheduler timers, avoid lock inversion with
* rq->lock and tasklist_lock.
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
sl->timer.function = hrtimer_wakeup;
sl->task = task;
#ifdef CONFIG_HIGH_RES_TIMERS
- sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
#endif
}
@@ -1591,29 +1593,95 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
- struct hrtimer_clock_base *new_base)
+static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+ struct hrtimer_clock_base *new_base, int dcpu)
{
struct hrtimer *timer;
struct rb_node *node;
+ int raise = 0;
while ((node = rb_first(&old_base->active))) {
timer = rb_entry(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer));
debug_hrtimer_deactivate(timer);
- __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
+
+ /*
+ * Should not happen. Per CPU timers should be
+ * canceled _before_ the migration code is called
+ */
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+ __remove_hrtimer(timer, old_base,
+ HRTIMER_STATE_INACTIVE, 0);
+ WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+ timer, timer->function, dcpu);
+ continue;
+ }
+
+ /*
+ * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+ * timer could be seen as !active and just vanish away
+ * under us on another CPU
+ */
+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
timer->base = new_base;
/*
* Enqueue the timer. Allow reprogramming of the event device
*/
enqueue_hrtimer(timer, new_base, 1);
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+ /*
+ * Happens with high res enabled when the timer was
+ * already expired and the callback mode is
+ * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+ * enqueue code does not move them to the soft irq
+ * pending list for performance/latency reasons, but
+ * in the migration state, we need to do that
+ * otherwise we end up with a stale timer.
+ */
+ if (timer->state == HRTIMER_STATE_MIGRATE) {
+ timer->state = HRTIMER_STATE_PENDING;
+ list_add_tail(&timer->cb_entry,
+ &new_base->cpu_base->cb_pending);
+ raise = 1;
+ }
+#endif
+ /* Clear the migration state bit */
+ timer->state &= ~HRTIMER_STATE_MIGRATE;
+ }
+ return raise;
+}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+ struct hrtimer_cpu_base *new_base)
+{
+ struct hrtimer *timer;
+ int raise = 0;
+
+ while (!list_empty(&old_base->cb_pending)) {
+ timer = list_entry(old_base->cb_pending.next,
+ struct hrtimer, cb_entry);
+
+ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
+ timer->base = &new_base->clock_base[timer->base->index];
+ list_add_tail(&timer->cb_entry, &new_base->cb_pending);
+ raise = 1;
}
+ return raise;
+}
+#else
+static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+ struct hrtimer_cpu_base *new_base)
+{
+ return 0;
}
+#endif
static void migrate_hrtimers(int cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
- int i;
+ int i, raise = 0;
BUG_ON(cpu_online(cpu));
old_base = &per_cpu(hrtimer_bases, cpu);
@@ -1626,14 +1694,21 @@ static void migrate_hrtimers(int cpu)
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- migrate_hrtimer_list(&old_base->clock_base[i],
- &new_base->clock_base[i]);
+ if (migrate_hrtimer_list(&old_base->clock_base[i],
+ &new_base->clock_base[i], cpu))
+ raise = 1;
}
+ if (migrate_hrtimer_pending(old_base, new_base))
+ raise = 1;
+
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
local_irq_enable();
put_cpu_var(hrtimer_bases);
+
+ if (raise)
+ hrtimer_raise_softirq();
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 59f3f0df35d4..aef265325cd3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -753,8 +753,14 @@ static struct page *kimage_alloc_page(struct kimage *image,
*old = addr | (*old & ~PAGE_MASK);
/* The old page I have found cannot be a
- * destination page, so return it.
+ * destination page, so return it if it's
+ * gfp_flags honor the ones passed in.
*/
+ if (!(gfp_mask & __GFP_HIGHMEM) &&
+ PageHighMem(old_page)) {
+ kimage_free_pages(old_page);
+ continue;
+ }
addr = old_addr;
page = old_page;
break;
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index eaa21fc9ad1d..25d955dbb989 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -488,7 +488,7 @@ static int write_mem_msg(int binary)
if (err)
return err;
if (CACHE_FLUSH_IS_SAFE)
- flush_icache_range(addr, addr + length + 1);
+ flush_icache_range(addr, addr + length);
return 0;
}
@@ -1462,7 +1462,7 @@ acquirelock:
* Get the passive CPU lock which will hold all the non-primary
* CPU in a spin state while the debugger is active
*/
- if (!kgdb_single_step || !kgdb_contthread) {
+ if (!kgdb_single_step) {
for (i = 0; i < NR_CPUS; i++)
atomic_set(&passive_cpu_wait[i], 1);
}
@@ -1475,7 +1475,7 @@ acquirelock:
#ifdef CONFIG_SMP
/* Signal the other CPUs to enter kgdb_wait() */
- if ((!kgdb_single_step || !kgdb_contthread) && kgdb_do_roundup)
+ if ((!kgdb_single_step) && kgdb_do_roundup)
kgdb_roundup_cpus(flags);
#endif
@@ -1494,7 +1494,7 @@ acquirelock:
kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
kgdb_deactivate_sw_breakpoints();
kgdb_single_step = 0;
- kgdb_contthread = NULL;
+ kgdb_contthread = current;
exception_level = 0;
/* Talk to debugger with gdbserial protocol */
@@ -1508,7 +1508,7 @@ acquirelock:
kgdb_info[ks->cpu].task = NULL;
atomic_set(&cpu_in_kgdb[ks->cpu], 0);
- if (!kgdb_single_step || !kgdb_contthread) {
+ if (!kgdb_single_step) {
for (i = NR_CPUS-1; i >= 0; i--)
atomic_set(&passive_cpu_wait[i], 0);
/*
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbff..5131e5471169 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -441,7 +441,7 @@ static struct k_itimer * alloc_posix_timer(void)
return tmr;
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr);
- tmr = NULL;
+ return NULL;
}
memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
return tmr;
diff --git a/kernel/sched.c b/kernel/sched.c
index e1299de1765e..2caedc47e764 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rt_b->rt_period_timer.function = sched_rt_period_timer;
- rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
}
static inline int rt_bandwidth_enabled(void)
@@ -1124,7 +1124,7 @@ static void init_rq_hrtick(struct rq *rq)
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
- rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f1f3eee28113..cb01cd8f919b 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -235,7 +235,8 @@ static void tick_do_broadcast_on_off(void *why)
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
if (!cpu_isset(cpu, tick_broadcast_mask)) {
cpu_set(cpu, tick_broadcast_mask);
- if (td->mode == TICKDEV_MODE_PERIODIC)
+ if (tick_broadcast_device.mode ==
+ TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
}
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
@@ -245,7 +246,8 @@ static void tick_do_broadcast_on_off(void *why)
if (!tick_broadcast_force &&
cpu_isset(cpu, tick_broadcast_mask)) {
cpu_clear(cpu, tick_broadcast_mask);
- if (td->mode == TICKDEV_MODE_PERIODIC)
+ if (tick_broadcast_device.mode ==
+ TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
}
break;
@@ -575,4 +577,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
+/*
+ * Check, whether the broadcast device is in one shot mode
+ */
+int tick_broadcast_oneshot_active(void)
+{
+ return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
+}
+
#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 019315ebf9de..df12434b43ca 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
*/
ktime_t tick_next_period;
ktime_t tick_period;
-int tick_do_timer_cpu __read_mostly = -1;
+int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
DEFINE_SPINLOCK(tick_device_lock);
/*
@@ -109,7 +109,8 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
if (!tick_device_is_functional(dev))
return;
- if (dev->features & CLOCK_EVT_FEAT_PERIODIC) {
+ if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+ !tick_broadcast_oneshot_active()) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else {
unsigned long seq;
@@ -148,7 +149,7 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == -1) {
+ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
tick_do_timer_cpu = cpu;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
@@ -300,7 +301,8 @@ static void tick_shutdown(unsigned int *cpup)
if (*cpup == tick_do_timer_cpu) {
int cpu = first_cpu(cpu_online_map);
- tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : -1;
+ tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
+ TICK_DO_TIMER_NONE;
}
spin_unlock_irqrestore(&tick_device_lock, flags);
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 6e9db9734aa6..469248782c23 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
/*
* tick internal variable and functions used by low/high res code
*/
+
+#define TICK_DO_TIMER_NONE -1
+#define TICK_DO_TIMER_BOOT -2
+
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
extern spinlock_t tick_device_lock;
extern ktime_t tick_next_period;
@@ -31,6 +35,7 @@ extern void tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+extern int tick_broadcast_oneshot_active(void);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
@@ -39,6 +44,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
+static inline int tick_broadcast_oneshot_active(void) { return 0; }
# endif /* !BROADCAST */
#else /* !ONESHOT */
@@ -68,6 +74,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
return 0;
}
+static inline int tick_broadcast_oneshot_active(void) { return 0; }
#endif /* !TICK_ONESHOT */
/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a87b0468568b..cb02324bdb88 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -75,6 +75,9 @@ static void tick_do_update_jiffies64(ktime_t now)
incr * ticks);
}
do_timer(++ticks);
+
+ /* Keep the tick_next_period variable up to date */
+ tick_next_period = ktime_add(last_jiffies_update, tick_period);
}
write_sequnlock(&xtime_lock);
}
@@ -221,7 +224,7 @@ void tick_nohz_stop_sched_tick(int inidle)
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = -1;
+ tick_do_timer_cpu = TICK_DO_TIMER_NONE;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
@@ -303,7 +306,7 @@ void tick_nohz_stop_sched_tick(int inidle)
* invoked.
*/
if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = -1;
+ tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->idle_sleeps++;
@@ -468,7 +471,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
* this duty, then the jiffies update is still serialized by
* xtime_lock.
*/
- if (unlikely(tick_do_timer_cpu == -1))
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
tick_do_timer_cpu = cpu;
/* Check, if the jiffies need an update */
@@ -570,7 +573,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
* this duty, then the jiffies update is still serialized by
* xtime_lock.
*/
- if (unlikely(tick_do_timer_cpu == -1))
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
tick_do_timer_cpu = cpu;
#endif
@@ -622,7 +625,7 @@ void tick_setup_sched_timer(void)
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ts->sched_timer.function = tick_sched_timer;
- ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
/* Get the next period (per cpu) */
ts->sched_timer.expires = tick_init_jiffy_update();
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index bb948e52ce20..db58fb66a135 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
- hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
}