From 9cedc194a7735e5d74ad26d3825247dc65a4d98e Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Wed, 14 Jun 2006 17:59:35 +0400
Subject: [PATCH] Return error in case flock_lock_file failure

If flock_lock_file() failed to allocate flock with locks_alloc_lock()
then "error = 0" is returned. Need to return some non-zero.

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 6f99c0a6f836..ab61a8b54829 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -755,6 +755,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	if (request->fl_type == F_UNLCK)
 		goto out;
 
+	error = -ENOMEM;
 	new_fl = locks_alloc_lock();
 	if (new_fl == NULL)
 		goto out;
@@ -781,6 +782,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
 	new_fl = NULL;
+	error = 0;
 
 out:
 	unlock_kernel();
-- 
cgit v1.2.3-59-g8ed1b


From 553698f944ed715dfe023b4cef07601f0ce735f0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 14 Jun 2006 19:11:57 +0200
Subject: [PATCH] cfq-iosched: fix crash in do_div()

We don't clear the seek stat values in cfq_alloc_io_context(), and if
->seek_mean is unlucky enough to be set to -36 by chance, the first
invocation of cfq_update_io_seektime() will oops with a divide by zero
in do_div().

Just memset the entire cic instead of filling invididual values
independently.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/cfq-iosched.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a46d030e092a..052b17487625 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1323,17 +1323,12 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
 	if (cic) {
-		RB_CLEAR(&cic->rb_node);
-		cic->key = NULL;
-		cic->cfqq[ASYNC] = NULL;
-		cic->cfqq[SYNC] = NULL;
+		memset(cic, 0, sizeof(*cic));
+		RB_CLEAR_COLOR(&cic->rb_node);
 		cic->last_end_request = jiffies;
-		cic->ttime_total = 0;
-		cic->ttime_samples = 0;
-		cic->ttime_mean = 0;
+		INIT_LIST_HEAD(&cic->queue_list);
 		cic->dtor = cfq_free_io_context;
 		cic->exit = cfq_exit_io_context;
-		INIT_LIST_HEAD(&cic->queue_list);
 		atomic_inc(&ioc_count);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 16070428d389ff47aa3476b0911179ad90c640a2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 16 Jun 2006 07:46:37 +0200
Subject: [PATCH] fix cdrom open

Some time ago the cdrom open routine was changed so that we call the
driver's open routine before checking to see if it is read only.  However,
if we discovered that a read write open was not possible and the open
flags required a writable open, we just returned -EROFS without calling
the driver's release routine.   This seems to work for most cdrom drivers,
but breaks the Powerpc iSeries virtual cdrom rather badly.

This just inserts the release call in the error path to balance the call
to "->open()" done by "open_for_data()".

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/cdrom/cdrom.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index a59876a0bfa1..3170eaa25087 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1009,9 +1009,9 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp)
 		if (fp->f_mode & FMODE_WRITE) {
 			ret = -EROFS;
 			if (cdrom_open_write(cdi))
-				goto err;
+				goto err_release;
 			if (!CDROM_CAN(CDC_RAM))
-				goto err;
+				goto err_release;
 			ret = 0;
 			cdi->media_written = 0;
 		}
@@ -1026,6 +1026,8 @@ int cdrom_open(struct cdrom_device_info *cdi, struct inode *ip, struct file *fp)
 	    not be mounting, but opening with O_NONBLOCK */
 	check_disk_change(ip->i_bdev);
 	return 0;
+err_release:
+	cdi->ops->release(cdi);
 err:
 	cdi->use_count--;
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 991721572ef2140c6411894aebefd3377e71a9e7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 16 Jun 2006 13:02:29 +0200
Subject: [PATCH] Fix missing ret assignment in __bio_map_user() error path

If get_user_pages() returns less pages than what we asked for, we jump
to out_unmap which will return ERR_PTR(ret).  But ret can contain a
positive number just smaller than local_nr_pages, so be sure to set it
to -EFAULT always.

Problem found and diagnosed by Damien Le Moal <damien@sdl.hitachi.co.jp>

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/bio.c b/fs/bio.c
index 098c12b2d60a..6a0b9ad8f8c9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -654,9 +654,10 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 				     write_to_vm, 0, &pages[cur_page], NULL);
 		up_read(&current->mm->mmap_sem);
 
-		if (ret < local_nr_pages)
+		if (ret < local_nr_pages) {
+			ret = -EFAULT;
 			goto out_unmap;
-
+		}
 
 		offset = uaddr & ~PAGE_MASK;
 		for (j = cur_page; j < page_limit; j++) {
-- 
cgit v1.2.3-59-g8ed1b


From 88d113601ca19c82feb038438c8c5db502d146f9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 16 Jun 2006 12:10:46 -0700
Subject: [PATCH] sky2: netconsole suspend/resume interaction

A couple of fixes that should prevent crashes when using netconsole and
suspend/resume. First, netconsole poll routine shouldn't run unless the
device is up; second, the NAPI poll should be disabled during suspend.

This is only an issue on sky2, because it has to have one NAPI poll
routine for both ports on dual port boards. Normal drivers use
netif_rx_schedule_prep and that checks for netif_running.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/net/sky2.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 97fe95666f3b..fba1e4d4d83d 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -2255,8 +2255,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs)
 static void sky2_netpoll(struct net_device *dev)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
+	struct net_device *dev0 = sky2->hw->dev[0];
 
-	sky2_intr(sky2->hw->pdev->irq, sky2->hw, NULL);
+	if (netif_running(dev) && __netif_rx_schedule_prep(dev0))
+		__netif_rx_schedule(dev0);
 }
 #endif
 
@@ -3446,6 +3448,7 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state)
 
 			sky2_down(dev);
 			netif_device_detach(dev);
+			netif_poll_disable(dev);
 		}
 	}
 
@@ -3474,6 +3477,8 @@ static int sky2_resume(struct pci_dev *pdev)
 		struct net_device *dev = hw->dev[i];
 		if (dev && netif_running(dev)) {
 			netif_device_attach(dev);
+			netif_poll_enable(dev);
+
 			err = sky2_up(dev);
 			if (err) {
 				printk(KERN_ERR PFX "%s: could not up: %d\n",
-- 
cgit v1.2.3-59-g8ed1b


From 8f17fc20bfb75bcec4cfeda789738979c8338fdc Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:11:15 +0400
Subject: [PATCH] check_process_timers: fix possible lockup

If the local timer interrupt happens just after do_exit() sets PF_EXITING
(and before it clears ->it_xxx_expires) run_posix_cpu_timers() will call
check_process_timers() with tasklist_lock + ->siglock held and

	check_process_timers:

		t = tsk;
		do {
			....

			do {
				t = next_thread(t);
			} while (unlikely(t->flags & PF_EXITING));
		} while (t != tsk);

the outer loop will never stop.

Actually, the window is bigger.  Another process can attach the timer
after ->it_xxx_expires was cleared (see the next commit) and the 'if
(PF_EXITING)' check in arm_timer() is racy (see the one after that).

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 520f6c59948d..9d9169aa2e24 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1173,6 +1173,9 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 		t = tsk;
 		do {
+			if (unlikely(t->flags & PF_EXITING))
+				continue;
+
 			ticks = cputime_add(cputime_add(t->utime, t->stime),
 					    prof_left);
 			if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1196,7 @@ static void check_process_timers(struct task_struct *tsk,
 					      t->it_sched_expires > sched)) {
 				t->it_sched_expires = sched;
 			}
-
-			do {
-				t = next_thread(t);
-			} while (unlikely(t->flags & PF_EXITING));
-		} while (t != tsk);
+		} while ((t = next_thread(t)) != tsk);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 30f1e3dd8c72abda343bcf415f7d8894a02b4290 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:11:43 +0400
Subject: [PATCH] run_posix_cpu_timers: remove a bogus BUG_ON()

do_exit() clears ->it_##clock##_expires, but nothing prevents
another cpu to attach the timer to exiting process after that.
arm_timer() tries to protect against this race, but the check
is racy.

After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and
before do_exit() calls 'schedule() local timer interrupt can find
tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu
does sys_wait4) interrupted task has ->signal == NULL.

At this moment exiting task has no pending cpu timers, they were
cleanuped in __exit_signal()->posix_cpu_timers_exit{,_group}(),
so we can just return from irq.

John Stultz recently confirmed this bug, see

	http://marc.theaimsgroup.com/?l=linux-kernel&m=115015841413687

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c             |  8 --------
 kernel/posix-cpu-timers.c | 36 ++++++++++++++++++------------------
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index e95b93282210..e06d0c10a24e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -881,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	tsk->flags |= PF_EXITING;
 
-	/*
-	 * Make sure we don't try to process any timer firings
-	 * while we are already exiting.
-	 */
- 	tsk->it_virt_expires = cputime_zero;
- 	tsk->it_prof_expires = cputime_zero;
-	tsk->it_sched_expires = 0;
-
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, current->pid,
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9d9169aa2e24..4882bf1e094a 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1288,30 +1288,30 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 #undef	UNEXPIRED
 
-	BUG_ON(tsk->exit_state);
-
 	/*
 	 * Double-check with locks held.
 	 */
 	read_lock(&tasklist_lock);
-	spin_lock(&tsk->sighand->siglock);
+	if (likely(tsk->signal != NULL)) {
+		spin_lock(&tsk->sighand->siglock);
 
-	/*
-	 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-	 * all the timers that are firing, and put them on the firing list.
-	 */
-	check_thread_timers(tsk, &firing);
-	check_process_timers(tsk, &firing);
+		/*
+		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+		 * all the timers that are firing, and put them on the firing list.
+		 */
+		check_thread_timers(tsk, &firing);
+		check_process_timers(tsk, &firing);
 
-	/*
-	 * We must release these locks before taking any timer's lock.
-	 * There is a potential race with timer deletion here, as the
-	 * siglock now protects our private firing list.  We have set
-	 * the firing flag in each timer, so that a deletion attempt
-	 * that gets the timer lock before we do will give it up and
-	 * spin until we've taken care of that timer below.
-	 */
-	spin_unlock(&tsk->sighand->siglock);
+		/*
+		 * We must release these locks before taking any timer's lock.
+		 * There is a potential race with timer deletion here, as the
+		 * siglock now protects our private firing list.  We have set
+		 * the firing flag in each timer, so that a deletion attempt
+		 * that gets the timer lock before we do will give it up and
+		 * spin until we've taken care of that timer below.
+		 */
+		spin_unlock(&tsk->sighand->siglock);
+	}
 	read_unlock(&tasklist_lock);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From f53ae1dc3429529a58aa538e0a860d713c7079c3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:12:02 +0400
Subject: [PATCH] arm_timer: remove a racy and obsolete PF_EXITING check

arm_timer() checks PF_EXITING to prevent BUG_ON(->exit_state)
in run_posix_cpu_timers().

However, for some reason it does so only for CPUCLOCK_PERTHREAD
case (which is imho wrong).

Also, this check is not reliable, PF_EXITING could be set on
another cpu without any locks/barriers just after the check,
so it can't prevent from attaching the timer to the exiting
task.

The previous patch makes this check unneeded.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 4882bf1e094a..d38d9ec3276c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 	struct cpu_timer_list *next;
 	unsigned long i;
 
-	if (CPUCLOCK_PERTHREAD(timer->it_clock)	&& (p->flags & PF_EXITING))
-		return;
-
 	head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
 		p->cpu_timers : p->signal->cpu_timers);
 	head += CPUCLOCK_WHICH(timer->it_clock);
-- 
cgit v1.2.3-59-g8ed1b


From 19242b240793ac769f5b91b68a5e43dd39f0c530 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Date: Thu, 15 Jun 2006 21:15:44 +1000
Subject: [PATCH] powerpc: Fix 64k pages on non-partitioned machines

The page size encoding passed to tlbie is incorrect for new-style
large pages.  This fixes it.  This doesn't affect anything on older
machines because mmu_psize_defs[psize].penc (the page size encoding)
is 0 for 4k and 16M pages (the two are distinguished by a separate "is
a large page" bit).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/mm/hash_native_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 33654d1b1b43..994856e55b7c 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -52,7 +52,7 @@ static inline void __tlbie(unsigned long va, unsigned int psize)
 	default:
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
-		va |= (0x7f >> (8 - penc)) << 12;
+		va |= penc << 12;
 		asm volatile("tlbie %0,1" : : "r" (va) : "memory");
 		break;
 	}
@@ -74,7 +74,7 @@ static inline void __tlbiel(unsigned long va, unsigned int psize)
 	default:
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
-		va |= (0x7f >> (8 - penc)) << 12;
+		va |= penc << 12;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
 			     : : "r"(va) : "memory");
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From ce221982e0bef039d7047b0f667bb414efece5af Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Date: Thu, 15 Jun 2006 15:09:16 +0200
Subject: [PATCH] powerpc: enable CPU_FTR_CI_LARGE_PAGE for cell

Reflect the fact that the Cell Broadband Engine supports 64k
pages by adding the bit to the CPU features.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-powerpc/cputable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 9fcf0162d859..f6265c2a0dd2 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -329,7 +329,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 #define CPU_FTRS_CELL	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO)
+	    CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 427abfa28afedffadfca9dd8b067eb6d36bac53f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Sat, 17 Jun 2006 18:49:35 -0700
Subject: Linux v2.6.17

Being named "Crazed Snow-Weasel" instills a lot of confidence in this
release, so I'm sure this will be one of the better ones.
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a3a7baad8555..1700d3f6ea22 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 17
-EXTRAVERSION =-rc6
+EXTRAVERSION =
 NAME=Crazed Snow-Weasel
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3-59-g8ed1b