From a9d9baa1e819b2f92f9cfa5240f766c535e636a6 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Mon, 28 Nov 2005 13:43:46 -0800
Subject: [PATCH] clean up lock_cpu_hotplug() in cpufreq

There are some callers in cpufreq hotplug notify path that the lowest
function calls lock_cpu_hotplug().  The lock is already held during
cpu_up() and cpu_down() calls when the notify calls are broadcast to
registered clients.

Ideally if possible, we could disable_preempt() at the highest caller and
make sure we dont sleep in the path down in cpufreq->driver_target() calls
but the calls are so intertwined and cumbersome to cleanup.

Hence we consistently use lock_cpu_hotplug() and unlock_cpu_hotplug() in
all places.

 - Removed export of cpucontrol semaphore and made it static.
 - removed explicit uses of up/down with lock_cpu_hotplug()
   so we can keep track of the the callers in same thread context and
   just keep refcounts without calling a down() that causes a deadlock.
 - Removed current_in_hotplug() uses
 - Removed PF_HOTPLUG_CPU in sched.h introduced for the current_in_hotplug()
   temporary workaround.

Tested with insmod of cpufreq_stat.ko, and logical online/offline
to make sure we dont have any hang situations.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: Zwane Mwaikambo <zwane@linuxpower.ca>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpu.c | 83 +++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 34 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index d61ba88f34e5..e882c6babf41 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -16,47 +16,76 @@
 #include <asm/semaphore.h>
 
 /* This protects CPUs going up and down... */
-DECLARE_MUTEX(cpucontrol);
-EXPORT_SYMBOL_GPL(cpucontrol);
+static DECLARE_MUTEX(cpucontrol);
 
 static struct notifier_block *cpu_chain;
 
-/*
- * Used to check by callers if they need to acquire the cpucontrol
- * or not to protect a cpu from being removed. Its sometimes required to
- * call these functions both for normal operations, and in response to
- * a cpu being added/removed. If the context of the call is in the same
- * thread context as a CPU hotplug thread, we dont need to take the lock
- * since its already protected
- * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj
- */
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct *lock_cpu_hotplug_owner;
+static int lock_cpu_hotplug_depth;
 
-int current_in_cpu_hotplug(void)
+static int __lock_cpu_hotplug(int interruptible)
 {
-	return (current->flags & PF_HOTPLUG_CPU);
+	int ret = 0;
+
+	if (lock_cpu_hotplug_owner != current) {
+		if (interruptible)
+			ret = down_interruptible(&cpucontrol);
+		else
+			down(&cpucontrol);
+	}
+
+	/*
+	 * Set only if we succeed in locking
+	 */
+	if (!ret) {
+		lock_cpu_hotplug_depth++;
+		lock_cpu_hotplug_owner = current;
+	}
+
+	return ret;
 }
 
-EXPORT_SYMBOL_GPL(current_in_cpu_hotplug);
+void lock_cpu_hotplug(void)
+{
+	__lock_cpu_hotplug(0);
+}
+EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
 
+void unlock_cpu_hotplug(void)
+{
+	if (--lock_cpu_hotplug_depth == 0) {
+		lock_cpu_hotplug_owner = NULL;
+		up(&cpucontrol);
+	}
+}
+EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
+
+int lock_cpu_hotplug_interruptible(void)
+{
+	return __lock_cpu_hotplug(1);
+}
+EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible);
+#endif	/* CONFIG_HOTPLUG_CPU */
 
 /* Need to know about CPUs going up/down? */
 int register_cpu_notifier(struct notifier_block *nb)
 {
 	int ret;
 
-	if ((ret = down_interruptible(&cpucontrol)) != 0)
+	if ((ret = lock_cpu_hotplug_interruptible()) != 0)
 		return ret;
 	ret = notifier_chain_register(&cpu_chain, nb);
-	up(&cpucontrol);
+	unlock_cpu_hotplug();
 	return ret;
 }
 EXPORT_SYMBOL(register_cpu_notifier);
 
 void unregister_cpu_notifier(struct notifier_block *nb)
 {
-	down(&cpucontrol);
+	lock_cpu_hotplug();
 	notifier_chain_unregister(&cpu_chain, nb);
-	up(&cpucontrol);
+	unlock_cpu_hotplug();
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
@@ -112,13 +141,6 @@ int cpu_down(unsigned int cpu)
 		goto out;
 	}
 
-	/*
-	 * Leave a trace in current->flags indicating we are already in
-	 * process of performing CPU hotplug. Callers can check if cpucontrol
-	 * is already acquired by current thread, and if so not cause
-	 * a dead lock by not acquiring the lock
-	 */
-	current->flags |= PF_HOTPLUG_CPU;
 	err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
@@ -171,7 +193,6 @@ out_thread:
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
 out:
-	current->flags &= ~PF_HOTPLUG_CPU;
 	unlock_cpu_hotplug();
 	return err;
 }
@@ -182,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu)
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	if ((ret = down_interruptible(&cpucontrol)) != 0)
+	if ((ret = lock_cpu_hotplug_interruptible()) != 0)
 		return ret;
 
 	if (cpu_online(cpu) || !cpu_present(cpu)) {
@@ -190,11 +211,6 @@ int __devinit cpu_up(unsigned int cpu)
 		goto out;
 	}
 
-	/*
-	 * Leave a trace in current->flags indicating we are already in
-	 * process of performing CPU hotplug.
-	 */
-	current->flags |= PF_HOTPLUG_CPU;
 	ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
 		printk("%s: attempt to bring up CPU %u failed\n",
@@ -217,7 +233,6 @@ out_notify:
 	if (ret != 0)
 		notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
 out:
-	current->flags &= ~PF_HOTPLUG_CPU;
-	up(&cpucontrol);
+	unlock_cpu_hotplug();
 	return ret;
 }
-- 
cgit v1.3-14-g43fede


From c13cf856cbe16aec3007604dc013cbf3a16c6686 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 28 Nov 2005 13:43:48 -0800
Subject: [PATCH] fork.c: proc_fork_connector() called under write_lock()

Don't do that - it does GFP_KERNEL allocations, for a start.

(Reported by Guillaume Thouvenin <guillaume.thouvenin@bull.net>)

Acked-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 1c1cf8dc396b..d0d49879ab7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1135,13 +1135,13 @@ static task_t *copy_process(unsigned long clone_flags,
 			__get_cpu_var(process_counts)++;
 	}
 
-	proc_fork_connector(p);
 	if (!current->signal->tty && p->signal->tty)
 		p->signal->tty = NULL;
 
 	nr_threads++;
 	total_forks++;
 	write_unlock_irq(&tasklist_lock);
+	proc_fork_connector(p);
 	retval = 0;
 
 fork_out:
-- 
cgit v1.3-14-g43fede


From ee500f274914653a7d3dfca7d0140a3d21658e32 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 28 Nov 2005 13:43:55 -0800
Subject: [PATCH] fix 32bit overflow in timespec_to_sample()

fix 32bit overflow in timespec_to_sample()

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 84af54c39e1b..cae4f5728997 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -36,7 +36,7 @@ timespec_to_sample(clockid_t which_clock, const struct timespec *tp)
 	union cpu_time_count ret;
 	ret.sched = 0;		/* high half always zero when .cpu used */
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-		ret.sched = tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+		ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
 	} else {
 		ret.cpu = timespec_to_cputime(tp);
 	}
-- 
cgit v1.3-14-g43fede


From bce61dd49d6ba7799be2de17c772e4c701558f14 Mon Sep 17 00:00:00 2001
From: Ben Collins <bcollins@debian.org>
Date: Mon, 28 Nov 2005 13:43:56 -0800
Subject: [PATCH] Fix hardcoded cpu=0 in workqueue for per_cpu_ptr() calls

Tracked this down on an Ultra Enterprise 3000.  It's a 6-way machine.  Odd
thing about this machine (and it's good for finding bugs like this) is that
the CPU id's are not 0 based.  For instance, on my machine the CPU's are
6/7/10/11/14/15.

This caused some NULL pointer dereference in kernel/workqueue.c because for
single_threaded workqueue's, it hardcoded the cpu to 0.

I changed the 0's to any_online_cpu(cpu_online_mask), which cpumask.h
claims is "First cpu in mask".  So this fits the same usage.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/workqueue.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 42df83d7fad2..2bd5aee1c736 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -102,7 +102,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 
 	if (!test_and_set_bit(0, &work->pending)) {
 		if (unlikely(is_single_threaded(wq)))
-			cpu = 0;
+			cpu = any_online_cpu(cpu_online_map);
 		BUG_ON(!list_empty(&work->entry));
 		__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 		ret = 1;
@@ -118,7 +118,7 @@ static void delayed_work_timer_fn(unsigned long __data)
 	int cpu = smp_processor_id();
 
 	if (unlikely(is_single_threaded(wq)))
-		cpu = 0;
+		cpu = any_online_cpu(cpu_online_map);
 
 	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
@@ -266,8 +266,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 	might_sleep();
 
 	if (is_single_threaded(wq)) {
-		/* Always use cpu 0's area. */
-		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0));
+		/* Always use first cpu's area. */
+		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map)));
 	} else {
 		int cpu;
 
@@ -320,7 +320,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	lock_cpu_hotplug();
 	if (singlethread) {
 		INIT_LIST_HEAD(&wq->list);
-		p = create_workqueue_thread(wq, 0);
+		p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map));
 		if (!p)
 			destroy = 1;
 		else
@@ -374,7 +374,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	/* We don't need the distraction of CPUs appearing and vanishing. */
 	lock_cpu_hotplug();
 	if (is_single_threaded(wq))
-		cleanup_workqueue_thread(wq, 0);
+		cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map));
 	else {
 		for_each_online_cpu(cpu)
 			cleanup_workqueue_thread(wq, cpu);
-- 
cgit v1.3-14-g43fede


From 8c4b8add83c93306b07d78469fd351dc462e4b66 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Mon, 28 Nov 2005 13:44:05 -0800
Subject: [PATCH] cpuset fork locking fix

Move the cpuset_fork() call below the write_unlock_irq call in
kernel/fork.c copy_process().

Since the cpuset-dual-semaphore-locking-overhaul.patch, the cpuset_fork()
routine acquires task_lock(), so cannot be called while holding the
tasklist_lock for write.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index d0d49879ab7c..fb8572a42297 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1124,8 +1124,6 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
-	cpuset_fork(p);
-
 	attach_pid(p, PIDTYPE_PID, p->pid);
 	attach_pid(p, PIDTYPE_TGID, p->tgid);
 	if (thread_group_leader(p)) {
@@ -1142,6 +1140,7 @@ static task_t *copy_process(unsigned long clone_flags,
 	total_forks++;
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
+	cpuset_fork(p);
 	retval = 0;
 
 fork_out:
-- 
cgit v1.3-14-g43fede


From 5bd0190bf3d7e53043a048e809ffa29d41b9d6ac Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 29 Nov 2005 19:34:32 -0800
Subject: [PATCH] Fix crash when ptrace poking hugepage areas

set_page_dirty() will not cope with being handed a page * which is part of
a compound page, but not the master page in that compound page.  This case
can occur via access_process_vm() if you attemp to write to another
process's hugepage memory area using ptrace() (causing an oops or hang).

This patch fixes the bug by only calling set_page_dirty() from
access_process_vm() if the page is not a compound page.  We already use a
similar fix in bio_set_pages_dirty() for the case of direct io to
hugepages.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: William Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/ptrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 17ee7e5a3451..656476eedb1b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -241,7 +241,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 		if (write) {
 			copy_to_user_page(vma, page, addr,
 					  maddr + offset, buf, bytes);
-			set_page_dirty_lock(page);
+			if (!PageCompound(page))
+				set_page_dirty_lock(page);
 		} else {
 			copy_from_user_page(vma, page, addr,
 					    buf, maddr + offset, bytes);
-- 
cgit v1.3-14-g43fede


From 123d3c13e2853a11b4d599d754b356acb12886e2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 29 Nov 2005 19:34:37 -0800
Subject: [PATCH] fix swsusp on machines not supporting S4

Fix swsusp on machines not supporting S4.  With recent changes, it is not
possible to trigger it using /sys filesystem.  Swsusp does not really need
any support from low-level code, it is possible to reboot or halt at the
end of suspend.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/main.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6ee2cad530e8..d253f3ae2fa5 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -24,7 +24,7 @@
 
 DECLARE_MUTEX(pm_sem);
 
-struct pm_ops * pm_ops = NULL;
+struct pm_ops *pm_ops;
 suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
 
 /**
@@ -151,6 +151,18 @@ static char *pm_states[PM_SUSPEND_MAX] = {
 #endif
 };
 
+static inline int valid_state(suspend_state_t state)
+{
+	/* Suspend-to-disk does not really need low-level support.
+	 * It can work with reboot if needed. */
+	if (state == PM_SUSPEND_DISK)
+		return 1;
+
+	if (pm_ops && pm_ops->valid && !pm_ops->valid(state))
+		return 0;
+	return 1;
+}
+
 
 /**
  *	enter_state - Do common work of entering low-power state.
@@ -167,7 +179,7 @@ static int enter_state(suspend_state_t state)
 {
 	int error;
 
-	if (pm_ops && pm_ops->valid && !pm_ops->valid(state))
+	if (!valid_state(state))
 		return -ENODEV;
 	if (down_trylock(&pm_sem))
 		return -EBUSY;
@@ -238,9 +250,8 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
 	char * s = buf;
 
 	for (i = 0; i < PM_SUSPEND_MAX; i++) {
-		if (pm_states[i] && pm_ops && (!pm_ops->valid
-			||(pm_ops->valid && pm_ops->valid(i))))
-			s += sprintf(s,"%s ",pm_states[i]);
+		if (pm_states[i] && valid_state(i))
+			s += sprintf(s,"%s ", pm_states[i]);
 	}
 	s += sprintf(s,"\n");
 	return (s - buf);
-- 
cgit v1.3-14-g43fede