aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2007-10-18 23:40:46 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 11:53:44 -0700
commit470fd646444c65a5d062a371f5ec8dcedee61239 (patch)
tree59b923486d4a95efa07c4b2ad7cb0b1fcc3f3c88 /kernel
parentControl groups: Replace "cont" with "cgrp" and other misc renaming (diff)
downloadlinux-dev-470fd646444c65a5d062a371f5ec8dcedee61239.tar.xz
linux-dev-470fd646444c65a5d062a371f5ec8dcedee61239.zip
hotplug cpu: migrate a task within its cpuset
When a cpu is disabled, move_task_off_dead_cpu() is called for tasks that have been running on that cpu. Currently, such a task is migrated: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any cpu which is both online and among that task's cpus_allowed It is typical of a multithreaded application running on a large NUMA system to have its tasks confined to a cpuset so as to cluster them near the memory that they share. Furthermore, it is typical to explicitly place such a task on a specific cpu in that cpuset. And in that case the task's cpus_allowed includes only a single cpu. This patch would insert a preference to migrate such a task to some cpu within its cpuset (and set its cpus_allowed to its entire cpuset). With this patch, migrate the task to: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any online cpu within the task's cpuset 3) to any cpu which is both online and among that task's cpus_allowed In order to do this, move_task_off_dead_cpu() must make a call to cpuset_cpus_allowed_locked(), a new subset of cpuset_cpus_allowed(), that will not block. (name change - per Oleg's suggestion) Calls are made to cpuset_lock() and cpuset_unlock() in migration_call() to set the cpuset mutex during the whole migrate_live_tasks() and migrate_dead_tasks() procedure. [akpm@linux-foundation.org: build fix] [pj@sgi.com: Fix indentation and spacing] Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Christoph Lameter <clameter@sgi.com> Cc: Paul Jackson <pj@sgi.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c15
-rw-r--r--kernel/sched.c12
2 files changed, 25 insertions, 2 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fa31cb9f9898..50f5dc463688 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
cpumask_t mask;
mutex_lock(&callback_mutex);
+ mask = cpuset_cpus_allowed_locked(tsk);
+ mutex_unlock(&callback_mutex);
+
+ return mask;
+}
+
+/**
+ * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+ * Must be called with callback_mutex held.
+ **/
+cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
+{
+ cpumask_t mask;
+
task_lock(tsk);
guarantee_online_cpus(task_cs(tsk), &mask);
task_unlock(tsk);
- mutex_unlock(&callback_mutex);
return mask;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index a7e30462600f..4071306e1088 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
/* No more Mr. Nice Guy. */
if (dest_cpu == NR_CPUS) {
+ cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
+ /*
+ * Try to stay on the same cpuset, where the
+ * current cpuset may be a subset of all cpus.
+ * The cpuset_cpus_allowed_locked() variant of
+ * cpuset_cpus_allowed() will not block. It must be
+ * called within calls to cpuset_lock/cpuset_unlock.
+ */
rq = task_rq_lock(p, &flags);
- cpus_setall(p->cpus_allowed);
+ p->cpus_allowed = cpus_allowed;
dest_cpu = any_online_cpu(p->cpus_allowed);
task_rq_unlock(rq, &flags);
@@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
migrate_live_tasks(cpu);
rq = cpu_rq(cpu);
kthread_stop(rq->migration_thread);
@@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
rq->idle->sched_class = &idle_sched_class;
migrate_dead_tasks(cpu);
spin_unlock_irq(&rq->lock);
+ cpuset_unlock();
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);