From 4f167fb491725ca0be9df0d76b4b2dd862cdfe0b Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Date: Mon, 16 May 2005 21:53:43 -0700
Subject: [PATCH] spurious interrupt fix

On my IA64 machine, after kernel 2.6.12-rc3 boots, an edge-triggered
interrupt (IRQ 46) keeps triggered over and over again.  There is no IRQ 46
interrupt action handler.  It has lots of impact on performance.

Kernel 2.6.10 and its prior versions have no the problem.  Basically,
kernel 2.6.10 will mask the spurious edge interrupt if the interrupt is
triggered for the second time and its status includes
IRQ_DISABLE|IRQ_PENDING.

Originally, IA64 kernel has its own specific _irq_desc definitions in file
arch/ia64/kernel/irq.c.  The definition initiates _irq_desc[irq].status to
IRQ_DISABLE.  Since kernel 2.6.11, it was moved to architecture independent
codes, i.e.  kernel/irq/handle.c, but kernel/irq/handle.c initiates
_irq_desc[irq].status to 0 instead of IRQ_DISABLE.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/irq/handle.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 2fb0e46e11f3..06b5a6323998 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -30,6 +30,7 @@
  */
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
 	[0 ... NR_IRQS-1] = {
+		.status = IRQ_DISABLED,
 		.handler = &no_irq_type,
 		.lock = SPIN_LOCK_UNLOCKED
 	}
-- 
cgit v1.3-14-g43fede


From 3c0547ba8b3bbd8b26ae35e33ac17ff51f67f78c Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Mon, 16 May 2005 21:53:47 -0700
Subject: [PATCH] add_preferred_console() build fix

Move add_preferred_console out of CONFIG_PRINTK so serial console does the
right thing.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/printk.c | 72 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 290a07ce2c8a..01b58d7d17ff 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -160,42 +160,6 @@ static int __init console_setup(char *str)
 
 __setup("console=", console_setup);
 
-/**
- * add_preferred_console - add a device to the list of preferred consoles.
- *
- * The last preferred console added will be used for kernel messages
- * and stdin/out/err for init.  Normally this is used by console_setup
- * above to handle user-supplied console arguments; however it can also
- * be used by arch-specific code either to override the user or more
- * commonly to provide a default console (ie from PROM variables) when
- * the user has not supplied one.
- */
-int __init add_preferred_console(char *name, int idx, char *options)
-{
-	struct console_cmdline *c;
-	int i;
-
-	/*
-	 *	See if this tty is not yet registered, and
-	 *	if we have a slot free.
-	 */
-	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
-		if (strcmp(console_cmdline[i].name, name) == 0 &&
-			  console_cmdline[i].index == idx) {
-				selected_console = i;
-				return 0;
-		}
-	if (i == MAX_CMDLINECONSOLES)
-		return -E2BIG;
-	selected_console = i;
-	c = &console_cmdline[i];
-	memcpy(c->name, name, sizeof(c->name));
-	c->name[sizeof(c->name) - 1] = 0;
-	c->options = options;
-	c->index = idx;
-	return 0;
-}
-
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned long size = memparse(str, &str);
@@ -670,6 +634,42 @@ static void call_console_drivers(unsigned long start, unsigned long end) {}
 
 #endif
 
+/**
+ * add_preferred_console - add a device to the list of preferred consoles.
+ *
+ * The last preferred console added will be used for kernel messages
+ * and stdin/out/err for init.  Normally this is used by console_setup
+ * above to handle user-supplied console arguments; however it can also
+ * be used by arch-specific code either to override the user or more
+ * commonly to provide a default console (ie from PROM variables) when
+ * the user has not supplied one.
+ */
+int __init add_preferred_console(char *name, int idx, char *options)
+{
+	struct console_cmdline *c;
+	int i;
+
+	/*
+	 *	See if this tty is not yet registered, and
+	 *	if we have a slot free.
+	 */
+	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
+		if (strcmp(console_cmdline[i].name, name) == 0 &&
+			  console_cmdline[i].index == idx) {
+				selected_console = i;
+				return 0;
+		}
+	if (i == MAX_CMDLINECONSOLES)
+		return -E2BIG;
+	selected_console = i;
+	c = &console_cmdline[i];
+	memcpy(c->name, name, sizeof(c->name));
+	c->name[sizeof(c->name) - 1] = 0;
+	c->options = options;
+	c->index = idx;
+	return 0;
+}
+
 /**
  * acquire_console_sem - lock the console system for exclusive use.
  *
-- 
cgit v1.3-14-g43fede


From dfaa9c94b13071c9b5f8578d0ae99acc76c60139 Mon Sep 17 00:00:00 2001
From: William Lee Irwin III <wli@holomorphy.com>
Date: Mon, 16 May 2005 21:53:58 -0700
Subject: [PATCH] profile.c: `schedule' parsing fix

profile=schedule parsing is not quite what it should be.  First, str[7] is
'e', not ',', but then even if it did fall through, prof_on =
SCHED_PROFILING would be clobbered inside if (get_option(...)) So a small
amount of rearrangement is done in this patch to correct it.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/profile.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/profile.c b/kernel/profile.c
index 0221a50ca867..ad8cbb75ffa2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -49,15 +49,19 @@ static DECLARE_MUTEX(profile_flip_mutex);
 
 static int __init profile_setup(char * str)
 {
+	static char __initdata schedstr[] = "schedule";
 	int par;
 
-	if (!strncmp(str, "schedule", 8)) {
+	if (!strncmp(str, schedstr, strlen(schedstr))) {
 		prof_on = SCHED_PROFILING;
-		printk(KERN_INFO "kernel schedule profiling enabled\n");
-		if (str[7] == ',')
-			str += 8;
-	}
-	if (get_option(&str,&par)) {
+		if (str[strlen(schedstr)] == ',')
+			str += strlen(schedstr) + 1;
+		if (get_option(&str, &par))
+			prof_shift = par;
+		printk(KERN_INFO
+			"kernel schedule profiling enabled (shift: %ld)\n",
+			prof_shift);
+	} else if (get_option(&str, &par)) {
 		prof_shift = par;
 		prof_on = CPU_PROFILING;
 		printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
-- 
cgit v1.3-14-g43fede


From 82428b62aa6294ea640c7e920a9224ecaf46db65 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Mon, 9 May 2005 08:07:00 -0700
Subject: [PATCH] Driver Core: pm diagnostics update, check for errors

This patch includes various tweaks in the messaging that appears during
system pm state transitions:

  * Warn about certain illegal calls in the device tree, like resuming
    child before parent or suspending parent before child.  This could
    happen easily enough through sysfs, or in some cases when drivers
    use device_pm_set_parent().

  * Be more consistent about dev_dbg() tracing ... do it for resume() and
    shutdown() too, and never if the driver doesn't have that method.

  * Say which type of system sleep state is being entered.

Except for the warnings, these only affect debug messaging.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/power/resume.c   | 11 ++++++++++-
 drivers/base/power/shutdown.c | 13 +++++++------
 drivers/base/power/suspend.c  | 17 +++++++++++++++--
 kernel/power/main.c           |  6 +++---
 4 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c
index f8f5055754d6..26468971ef5a 100644
--- a/drivers/base/power/resume.c
+++ b/drivers/base/power/resume.c
@@ -22,8 +22,17 @@ extern int sysdev_resume(void);
 
 int resume_device(struct device * dev)
 {
-	if (dev->bus && dev->bus->resume)
+	if (dev->power.pm_parent
+			&& dev->power.pm_parent->power.power_state) {
+		dev_err(dev, "PM: resume from %d, parent %s still %d\n",
+			dev->power.power_state,
+			dev->power.pm_parent->bus_id,
+			dev->power.pm_parent->power.power_state);
+	}
+	if (dev->bus && dev->bus->resume) {
+		dev_dbg(dev,"resuming\n");
 		return dev->bus->resume(dev);
+	}
 	return 0;
 }
 
diff --git a/drivers/base/power/shutdown.c b/drivers/base/power/shutdown.c
index d1e023fbe169..97979901c149 100644
--- a/drivers/base/power/shutdown.c
+++ b/drivers/base/power/shutdown.c
@@ -25,8 +25,10 @@ int device_detach_shutdown(struct device * dev)
 		return 0;
 
 	if (dev->detach_state == DEVICE_PM_OFF) {
-		if (dev->driver && dev->driver->shutdown)
+		if (dev->driver && dev->driver->shutdown) {
+			dev_dbg(dev, "shutdown\n");
 			dev->driver->shutdown(dev);
+		}
 		return 0;
 	}
 	return dpm_runtime_suspend(dev, dev->detach_state);
@@ -52,13 +54,12 @@ void device_shutdown(void)
 	struct device * dev;
 
 	down_write(&devices_subsys.rwsem);
-	list_for_each_entry_reverse(dev, &devices_subsys.kset.list, kobj.entry) {
-		pr_debug("shutting down %s: ", dev->bus_id);
+	list_for_each_entry_reverse(dev, &devices_subsys.kset.list,
+				kobj.entry) {
 		if (dev->driver && dev->driver->shutdown) {
-			pr_debug("Ok\n");
+			dev_dbg(dev, "shutdown\n");
 			dev->driver->shutdown(dev);
-		} else
-			pr_debug("Ignored.\n");
+		}
 	}
 	up_write(&devices_subsys.rwsem);
 
diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c
index a0b5cf689e63..0ec44ef840be 100644
--- a/drivers/base/power/suspend.c
+++ b/drivers/base/power/suspend.c
@@ -39,12 +39,25 @@ int suspend_device(struct device * dev, pm_message_t state)
 {
 	int error = 0;
 
-	dev_dbg(dev, "suspending\n");
+	if (dev->power.power_state) {
+		dev_dbg(dev, "PM: suspend %d-->%d\n",
+			dev->power.power_state, state);
+	}
+	if (dev->power.pm_parent
+			&& dev->power.pm_parent->power.power_state) {
+		dev_err(dev,
+			"PM: suspend %d->%d, parent %s already %d\n",
+			dev->power.power_state, state,
+			dev->power.pm_parent->bus_id,
+			dev->power.pm_parent->power.power_state);
+	}
 
 	dev->power.prev_state = dev->power.power_state;
 
-	if (dev->bus && dev->bus->suspend && !dev->power.power_state)
+	if (dev->bus && dev->bus->suspend && !dev->power.power_state) {
+		dev_dbg(dev, "suspending\n");
 		error = dev->bus->suspend(dev, state);
+	}
 
 	return error;
 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7960ddf04a57..4cdebc972ff2 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -156,14 +156,14 @@ static int enter_state(suspend_state_t state)
 		goto Unlock;
 	}
 
-	pr_debug("PM: Preparing system for suspend\n");
+	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
 	if ((error = suspend_prepare(state)))
 		goto Unlock;
 
-	pr_debug("PM: Entering state.\n");
+	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
 	error = suspend_enter(state);
 
-	pr_debug("PM: Finishing up.\n");
+	pr_debug("PM: Finishing wakeup.\n");
 	suspend_finish(state);
  Unlock:
 	up(&pm_sem);
-- 
cgit v1.3-14-g43fede


From b39c4fab259b216148e705344a892c96efe1946d Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 20 May 2005 13:59:15 -0700
Subject: [PATCH] cpusets+hotplug+preepmt broken

This patch removes the entwining of cpusets and hotplug code in the "No
more Mr.  Nice Guy" case of sched.c move_task_off_dead_cpu().

Since the hotplug code is holding a spinlock at this point, we cannot take
the cpuset semaphore, cpuset_sem, as would seem to be required either to
update the tasks cpuset, or to scan up the nested cpuset chain, looking for
the nearest cpuset ancestor that still has some CPUs that are online.  So
we just punt and blast the tasks cpus_allowed with all bits allowed.

This reverts these lines of code to what they were before the cpuset patch.
 And it updates the cpuset Doc file, to match.

The one known alternative to this that seems to work came from Dinakar
Guniguntala, and required the hotplug code to take the cpuset_sem semaphore
much earlier in its processing.  So far as we know, the increased locking
entanglement between cpusets and hot plug of this alternative approach is
not worth doing in this case.

Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Dinakar Guniguntala <dino@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cpusets.txt | 3 +--
 kernel/sched.c            | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 1ad26d2c20ae..2f8f24eaefd9 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -252,8 +252,7 @@ in a tasks processor placement.
 There is an exception to the above.  If hotplug funtionality is used
 to remove all the CPUs that are currently assigned to a cpuset,
 then the kernel will automatically update the cpus_allowed of all
-tasks attached to CPUs in that cpuset with the online CPUs of the
-nearest parent cpuset that still has some CPUs online.  When memory
+tasks attached to CPUs in that cpuset to allow all CPUs.  When memory
 hotplug functionality for removing Memory Nodes is available, a
 similar exception is expected to apply there as well.  In general,
 the kernel prefers to violate cpuset placement, over starving a task
diff --git a/kernel/sched.c b/kernel/sched.c
index 0dc3158667a2..66b2ed784822 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4243,7 +4243,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
 
 	/* No more Mr. Nice Guy. */
 	if (dest_cpu == NR_CPUS) {
-		tsk->cpus_allowed = cpuset_cpus_allowed(tsk);
+		cpus_setall(tsk->cpus_allowed);
 		dest_cpu = any_online_cpu(tsk->cpus_allowed);
 
 		/*
-- 
cgit v1.3-14-g43fede


From 10f02d1c59e55f529140dda3a92f0099d748451c Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@labri.fr>
Date: Sat, 21 May 2005 17:50:15 +0200
Subject: [PATCH] spin_unlock_bh() and preempt_check_resched()

In _spin_unlock_bh(lock):
	do { \
		_raw_spin_unlock(lock); \
		preempt_enable(); \
		local_bh_enable(); \
		__release(lock); \
	} while (0)

there is no reason for using preempt_enable() instead of a simple
preempt_enable_no_resched()

Since we know bottom halves are disabled, preempt_schedule() will always
return at once (preempt_count!=0), and hence preempt_check_resched() is
useless here...

This fixes it by using "preempt_enable_no_resched()" instead of the
"preempt_enable()", and thus avoids the useless preempt_check_resched()
just before re-enabling bottom halves.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/spinlock.h | 8 ++++----
 kernel/spinlock.c        | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e895f3eaf53a..d6ba068719b6 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -248,7 +248,7 @@ typedef struct {
 
 #define _spin_trylock_bh(lock)	({preempt_disable(); local_bh_disable(); \
 				_raw_spin_trylock(lock) ? \
-				1 : ({preempt_enable(); local_bh_enable(); 0;});})
+				1 : ({preempt_enable_no_resched(); local_bh_enable(); 0;});})
 
 #define _spin_lock(lock)	\
 do { \
@@ -383,7 +383,7 @@ do { \
 #define _spin_unlock_bh(lock) \
 do { \
 	_raw_spin_unlock(lock); \
-	preempt_enable(); \
+	preempt_enable_no_resched(); \
 	local_bh_enable(); \
 	__release(lock); \
 } while (0)
@@ -391,7 +391,7 @@ do { \
 #define _write_unlock_bh(lock) \
 do { \
 	_raw_write_unlock(lock); \
-	preempt_enable(); \
+	preempt_enable_no_resched(); \
 	local_bh_enable(); \
 	__release(lock); \
 } while (0)
@@ -423,8 +423,8 @@ do { \
 #define _read_unlock_bh(lock)	\
 do { \
 	_raw_read_unlock(lock);	\
+	preempt_enable_no_resched();	\
 	local_bh_enable();	\
-	preempt_enable();	\
 	__release(lock); \
 } while (0)
 
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index e15ed17863f1..0c3f9d8bbe17 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -294,7 +294,7 @@ EXPORT_SYMBOL(_spin_unlock_irq);
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	_raw_spin_unlock(lock);
-	preempt_enable();
+	preempt_enable_no_resched();
 	local_bh_enable();
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(_read_unlock_irq);
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	_raw_read_unlock(lock);
-	preempt_enable();
+	preempt_enable_no_resched();
 	local_bh_enable();
 }
 EXPORT_SYMBOL(_read_unlock_bh);
@@ -342,7 +342,7 @@ EXPORT_SYMBOL(_write_unlock_irq);
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	_raw_write_unlock(lock);
-	preempt_enable();
+	preempt_enable_no_resched();
 	local_bh_enable();
 }
 EXPORT_SYMBOL(_write_unlock_bh);
@@ -354,7 +354,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 	if (_raw_spin_trylock(lock))
 		return 1;
 
-	preempt_enable();
+	preempt_enable_no_resched();
 	local_bh_enable();
 	return 0;
 }
-- 
cgit v1.3-14-g43fede


From c33880aaddbbab1ccf36f4457ed1090621f2e39a Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Tue, 24 May 2005 19:29:47 -0700
Subject: [PATCH] sigkill priority fix

If SIGKILL does not have priority, we cannot instantly kill task before it
makes some unexpected job.  It can be critical, but we were unable to
reproduce this easily until Heiko Carstens <Heiko.Carstens@de.ibm.com>
reported this problem on LKML.

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-Off-By: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 8f3debc77c5b..b3c24c732c5a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -522,7 +522,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 {
 	int sig = 0;
 
-	sig = next_signal(pending, mask);
+	/* SIGKILL must have priority, otherwise it is quite easy
+	 * to create an unkillable process, sending sig < SIGKILL
+	 * to self */
+	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
+		if (!sigismember(mask, SIGKILL))
+			sig = SIGKILL;
+	}
+
+	if (likely(!sig))
+		sig = next_signal(pending, mask);
 	if (sig) {
 		if (current->notifier) {
 			if (sigismember(current->notifier_mask, sig)) {
-- 
cgit v1.3-14-g43fede