From 18cb2aef91b37dbce2bec2f39bb1dddd0e9dd838 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Sat, 7 Aug 2010 03:26:23 +0900
Subject: percpu: handle __percpu notations in UP accessors

UP accessors didn't take care of __percpu notations leading to a lot
of spurious sparse warnings on UP configurations.  Fix it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h | 15 ++++++++++-----
 include/linux/percpu.h       |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b5043a9890d8..08923b684768 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &(var)))
-#define __get_cpu_var(var)			(var)
-#define __raw_get_cpu_var(var)			(var)
-#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
-#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
+#define VERIFY_PERCPU_PTR(__p) ({			\
+	__verify_pcpu_ptr((__p));			\
+	(typeof(*(__p)) __kernel __force *)(__p);	\
+})
+
+#define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
+#define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
+#define __this_cpu_ptr(ptr)	this_cpu_ptr(ptr)
 
 #endif	/* SMP */
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b8b9084527b1..49466b13c5c6 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,7 +149,7 @@ extern void __init percpu_init_late(void);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
 
 /* can't distinguish from other static vars, always false */
 static inline bool is_kernel_percpu_address(unsigned long addr)
-- 
cgit v1.2.3-59-g8ed1b


From 1b5ad24878b7e5a543b98c5d2f8c0d8c0dd3088f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Sat, 7 Aug 2010 14:29:22 +0200
Subject: slub: add missing __percpu markup in mm/slub_def.h

kmem_cache->cpu_slab is a percpu pointer but was missing __percpu
markup.  Add it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 6447a723ecb1..5ec4bc0e45aa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -68,7 +68,7 @@ struct kmem_cache_order_objects {
  * Slab cache management.
  */
 struct kmem_cache {
-	struct kmem_cache_cpu *cpu_slab;
+	struct kmem_cache_cpu __percpu *cpu_slab;
 	/* Used for retriving partial slabs etc */
 	unsigned long flags;
 	int size;		/* The size of an object including meta data */
-- 
cgit v1.2.3-59-g8ed1b


From 92298e668372f2f6c8a79fb272f13d65161a4876 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Fri, 13 Aug 2010 10:22:17 +1000
Subject: PCI: provide stub pci_domain_nr function for !CONFIG_PCI configs

Allows the new PCI domain aware DRM code to compile on m68k.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index b1d17956a153..c8d95e369ff4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 						unsigned int devfn)
 { return NULL; }
 
+static inline int pci_domain_nr(struct pci_bus *bus)
+{ return 0; }
+
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
 #define dev_num_vf(d) (0)
-- 
cgit v1.2.3-59-g8ed1b


From 1b2f1489633888d4a06028315dc19d65768a1c05 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 14 Aug 2010 20:20:34 +1000
Subject: drm: block userspace under allocating buffer and having drivers
 overwrite it (v2)

With the current screwed but its ABI, ioctls for the drm, Linus pointed out that we could allow userspace to specify the allocation size, but we pass it to the driver which then uses it blindly to store a struct. Now if userspace specifies the allocation size as smaller than the driver needs, the driver can possibly overwrite memory.

This patch restructures the driver ioctls so we store the structure size we are expecting, and make sure we allocate at least that size. The copy from/to userspace are still restricted to the size the user specifies, this allows ioctl structs to grow on both sides of the equation.

Up until now we didn't really use the DRM_IOCTL defines in the kernel, so this cleans them up and adds them for nouveau.

v2:
fix nouveau pushbuf arg (thanks to Ben for pointing it out)

Reported-by: Linus Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c                 | 23 ++++++---
 drivers/gpu/drm/i810/i810_dma.c           | 30 ++++++------
 drivers/gpu/drm/i830/i830_dma.c           | 28 +++++------
 drivers/gpu/drm/i915/i915_dma.c           | 80 +++++++++++++++----------------
 drivers/gpu/drm/mga/mga_state.c           | 26 +++++-----
 drivers/gpu/drm/nouveau/nouveau_channel.c | 24 +++++-----
 drivers/gpu/drm/r128/r128_state.c         | 35 +++++++-------
 drivers/gpu/drm/radeon/radeon_kms.c       | 78 +++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon_state.c     | 56 +++++++++++-----------
 drivers/gpu/drm/savage/savage_bci.c       |  8 ++--
 drivers/gpu/drm/sis/sis_mm.c              | 12 ++---
 drivers/gpu/drm/via/via_dma.c             | 28 +++++------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c       | 34 ++++++-------
 include/drm/drmP.h                        |  6 ++-
 include/drm/i830_drm.h                    | 28 +++++------
 include/drm/i915_drm.h                    |  1 +
 include/drm/mga_drm.h                     |  2 +-
 include/drm/nouveau_drm.h                 | 13 +++++
 include/drm/radeon_drm.h                  |  4 +-
 include/drm/savage_drm.h                  |  8 ++--
 20 files changed, 275 insertions(+), 249 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3644c94c0a17..84da748555bc 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -55,6 +55,9 @@
 static int drm_version(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 
+#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
+	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0}
+
 /** Ioctl table */
 static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, 0),
@@ -421,6 +424,7 @@ long drm_ioctl(struct file *filp,
 	int retcode = -EINVAL;
 	char stack_kdata[128];
 	char *kdata = NULL;
+	unsigned int usize, asize;
 
 	dev = file_priv->minor->dev;
 	atomic_inc(&dev->ioctl_count);
@@ -436,11 +440,18 @@ long drm_ioctl(struct file *filp,
 	    ((nr < DRM_COMMAND_BASE) || (nr >= DRM_COMMAND_END)))
 		goto err_i1;
 	if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END) &&
-	    (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls))
+	    (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) {
+		u32 drv_size;
 		ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE];
+		drv_size = _IOC_SIZE(ioctl->cmd_drv);
+		usize = asize = _IOC_SIZE(cmd);
+		if (drv_size > asize)
+			asize = drv_size;
+	}
 	else if ((nr >= DRM_COMMAND_END) || (nr < DRM_COMMAND_BASE)) {
 		ioctl = &drm_ioctls[nr];
 		cmd = ioctl->cmd;
+		usize = asize = _IOC_SIZE(cmd);
 	} else
 		goto err_i1;
 
@@ -460,10 +471,10 @@ long drm_ioctl(struct file *filp,
 		retcode = -EACCES;
 	} else {
 		if (cmd & (IOC_IN | IOC_OUT)) {
-			if (_IOC_SIZE(cmd) <= sizeof(stack_kdata)) {
+			if (asize <= sizeof(stack_kdata)) {
 				kdata = stack_kdata;
 			} else {
-				kdata = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL);
+				kdata = kmalloc(asize, GFP_KERNEL);
 				if (!kdata) {
 					retcode = -ENOMEM;
 					goto err_i1;
@@ -473,12 +484,12 @@ long drm_ioctl(struct file *filp,
 
 		if (cmd & IOC_IN) {
 			if (copy_from_user(kdata, (void __user *)arg,
-					   _IOC_SIZE(cmd)) != 0) {
+					   usize) != 0) {
 				retcode = -EFAULT;
 				goto err_i1;
 			}
 		} else
-			memset(kdata, 0, _IOC_SIZE(cmd));
+			memset(kdata, 0, usize);
 
 		if (ioctl->flags & DRM_UNLOCKED)
 			retcode = func(dev, kdata, file_priv);
@@ -490,7 +501,7 @@ long drm_ioctl(struct file *filp,
 
 		if (cmd & IOC_OUT) {
 			if (copy_to_user((void __user *)arg, kdata,
-					 _IOC_SIZE(cmd)) != 0)
+					 usize) != 0)
 				retcode = -EFAULT;
 		}
 	}
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index 0e6c131313d9..61b4caf220fa 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -1255,21 +1255,21 @@ long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 
 struct drm_ioctl_desc i810_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls);
diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c
index 5168862c9227..671aa18415ac 100644
--- a/drivers/gpu/drm/i830/i830_dma.c
+++ b/drivers/gpu/drm/i830/i830_dma.c
@@ -1524,20 +1524,20 @@ long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 
 struct drm_ioctl_desc i830_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f19ffe87af3c..a2d3509c393b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2360,46 +2360,46 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc i915_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_FLUSH, i915_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_FLIP, i915_flip_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_GETPARAM, i915_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_ALLOC, i915_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_FREE, i915_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_DESTROY_HEAP,  i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ),
-	DRM_IOCTL_DEF(DRM_I915_SET_VBLANK_PIPE,  i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ),
-	DRM_IOCTL_DEF(DRM_I915_GET_VBLANK_PIPE,  i915_vblank_pipe_get, DRM_AUTH ),
-	DRM_IOCTL_DEF(DRM_I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_FLUSH, i915_flush_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_FLIP, i915_flip_bufs, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_ALLOC, i915_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_FREE, i915_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP,  i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE,  i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE,  i915_vblank_pipe_get, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index fff82045c427..9ce2827f8c00 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -1085,19 +1085,19 @@ file_priv)
 }
 
 struct drm_ioctl_desc mga_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_MGA_FLUSH, mga_dma_flush, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_RESET, mga_dma_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_SWAP, mga_dma_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_CLEAR, mga_dma_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_VERTEX, mga_dma_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_INDICES, mga_dma_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_ILOAD, mga_dma_iload, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_BLIT, mga_dma_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_GETPARAM, mga_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_SET_FENCE, mga_set_fence, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(MGA_FLUSH, mga_dma_flush, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_RESET, mga_dma_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_SWAP, mga_dma_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_CLEAR, mga_dma_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_VERTEX, mga_dma_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_INDICES, mga_dma_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_ILOAD, mga_dma_iload, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_BLIT, mga_dma_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_GETPARAM, mga_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_SET_FENCE, mga_set_fence, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 };
 
 int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 90fdcda332be..0480f064f2c1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -426,18 +426,18 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
  ***********************************/
 
 struct drm_ioctl_desc nouveau_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index 077af1f2f9b4..a9e33ce65918 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -1639,30 +1639,29 @@ void r128_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
 			r128_do_cleanup_pageflip(dev);
 	}
 }
-
 void r128_driver_lastclose(struct drm_device *dev)
 {
 	r128_do_cleanup_cce(dev);
 }
 
 struct drm_ioctl_desc r128_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_IDLE, r128_cce_idle, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_RESET, r128_engine_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_FULLSCREEN, r128_fullscreen, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_SWAP, r128_cce_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_FLIP, r128_cce_flip, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_CLEAR, r128_cce_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_VERTEX, r128_cce_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_INDICES, r128_cce_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_BLIT, r128_cce_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_DEPTH, r128_cce_depth, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_STIPPLE, r128_cce_stipple, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_GETPARAM, r128_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_IDLE, r128_cce_idle, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_RESET, r128_engine_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_FULLSCREEN, r128_fullscreen, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_SWAP, r128_cce_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_FLIP, r128_cce_flip, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_CLEAR, r128_cce_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_VERTEX, r128_cce_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INDICES, r128_cce_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_BLIT, r128_cce_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_DEPTH, r128_cce_depth, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_STIPPLE, r128_cce_stipple, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH),
 };
 
 int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index b1c8ace5f080..27435db0aa48 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -323,45 +323,45 @@ KMS_INVALID_IOCTL(radeon_surface_free_kms)
 
 
 struct drm_ioctl_desc radeon_ioctls_kms[] = {
-	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
 	/* KMS */
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index b3ba44c0a818..4ae5a3d1074e 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -3228,34 +3228,34 @@ void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc radeon_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
+	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index f576232846c3..6756c97899f1 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -1082,10 +1082,10 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc savage_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
 };
 
 int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 07d0f2979cac..7fe2b63412ce 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -320,12 +320,12 @@ void sis_reclaim_buffers_locked(struct drm_device *dev,
 }
 
 struct drm_ioctl_desc sis_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_FB_FREE, sis_drm_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_FREE, sis_drm_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_FB_FREE, sis_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_FREE, sis_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 };
 
 int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls);
diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index 68dda74a50ae..cc0ffa9abd00 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c
@@ -722,20 +722,20 @@ static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file *
 }
 
 struct drm_ioctl_desc via_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
+	DRM_IOCTL_DEF_DRV(VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_FREEMEM, via_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_DMA_INIT, via_dma_init, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
 };
 
 int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9dd395b90216..72ec2e2b6e97 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -99,47 +99,47 @@
  */
 
 #define VMW_IOCTL_DEF(ioctl, func, flags) \
-	[DRM_IOCTL_NR(ioctl) - DRM_COMMAND_BASE] = {ioctl, flags, func}
+  [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_##ioctl, flags, func, DRM_IOCTL_##ioctl}
 
 /**
  * Ioctl definitions.
  */
 
 static struct drm_ioctl_desc vmw_ioctls[] = {
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_GET_PARAM, vmw_getparam_ioctl,
+	VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
+	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CURSOR_BYPASS,
+	VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
 		      vmw_kms_cursor_bypass_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
 
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CONTROL_STREAM, vmw_overlay_ioctl,
+	VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
+	VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
 
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
+	VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
+	VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_REF_SURFACE, vmw_surface_reference_ioctl,
+	VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_EXECBUF, vmw_execbuf_ioctl,
+	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
+	VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
 		      DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
+	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED)
 };
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 2a512bc0d4ab..7809d230adee 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -305,14 +305,16 @@ struct drm_ioctl_desc {
 	unsigned int cmd;
 	int flags;
 	drm_ioctl_t *func;
+	unsigned int cmd_drv;
 };
 
 /**
  * Creates a driver or general drm_ioctl_desc array entry for the given
  * ioctl, for use by drm_ioctl().
  */
-#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
-	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags}
+
+#define DRM_IOCTL_DEF_DRV(ioctl, _func, _flags)			\
+	[DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl}
 
 struct drm_magic_entry {
 	struct list_head head;
diff --git a/include/drm/i830_drm.h b/include/drm/i830_drm.h
index 4b00d2dd4f68..61315c29b8f3 100644
--- a/include/drm/i830_drm.h
+++ b/include/drm/i830_drm.h
@@ -264,20 +264,20 @@ typedef struct _drm_i830_sarea {
 #define DRM_I830_GETPARAM	0x0c
 #define DRM_I830_SETPARAM	0x0d
 
-#define DRM_IOCTL_I830_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_INIT, drm_i830_init_t)
-#define DRM_IOCTL_I830_VERTEX		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_VERTEX, drm_i830_vertex_t)
-#define DRM_IOCTL_I830_CLEAR		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_CLEAR, drm_i830_clear_t)
-#define DRM_IOCTL_I830_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLUSH)
-#define DRM_IOCTL_I830_GETAGE		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_GETAGE)
-#define DRM_IOCTL_I830_GETBUF		DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETBUF, drm_i830_dma_t)
-#define DRM_IOCTL_I830_SWAP		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_SWAP)
-#define DRM_IOCTL_I830_COPY		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_COPY, drm_i830_copy_t)
-#define DRM_IOCTL_I830_DOCOPY		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_DOCOPY)
-#define DRM_IOCTL_I830_FLIP		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLIP)
-#define DRM_IOCTL_I830_IRQ_EMIT         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_EMIT, drm_i830_irq_emit_t)
-#define DRM_IOCTL_I830_IRQ_WAIT         DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_WAIT, drm_i830_irq_wait_t)
-#define DRM_IOCTL_I830_GETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETPARAM, drm_i830_getparam_t)
-#define DRM_IOCTL_I830_SETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_SETPARAM, drm_i830_setparam_t)
+#define DRM_IOCTL_I830_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_INIT, drm_i830_init_t)
+#define DRM_IOCTL_I830_VERTEX		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_VERTEX, drm_i830_vertex_t)
+#define DRM_IOCTL_I830_CLEAR		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_CLEAR, drm_i830_clear_t)
+#define DRM_IOCTL_I830_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLUSH)
+#define DRM_IOCTL_I830_GETAGE		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_GETAGE)
+#define DRM_IOCTL_I830_GETBUF		DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETBUF, drm_i830_dma_t)
+#define DRM_IOCTL_I830_SWAP		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_SWAP)
+#define DRM_IOCTL_I830_COPY		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_COPY, drm_i830_copy_t)
+#define DRM_IOCTL_I830_DOCOPY		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_DOCOPY)
+#define DRM_IOCTL_I830_FLIP		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLIP)
+#define DRM_IOCTL_I830_IRQ_EMIT         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_IRQ_EMIT, drm_i830_irq_emit_t)
+#define DRM_IOCTL_I830_IRQ_WAIT         DRM_IOW( DRM_COMMAND_BASE + DRM_I830_IRQ_WAIT, drm_i830_irq_wait_t)
+#define DRM_IOCTL_I830_GETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETPARAM, drm_i830_getparam_t)
+#define DRM_IOCTL_I830_SETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_SETPARAM, drm_i830_setparam_t)
 
 typedef struct _drm_i830_clear {
 	int clear_color;
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 7f0028e1010b..000357bf3ac8 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_SET_VBLANK_PIPE	DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_GET_VBLANK_PIPE	DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
+#define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h
index 3ffbc4798afa..c16097f99be0 100644
--- a/include/drm/mga_drm.h
+++ b/include/drm/mga_drm.h
@@ -248,7 +248,7 @@ typedef struct _drm_mga_sarea {
 #define DRM_MGA_DMA_BOOTSTRAP  0x0c
 
 #define DRM_IOCTL_MGA_INIT     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t)
-#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t)
+#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock)
 #define DRM_IOCTL_MGA_RESET    DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_RESET)
 #define DRM_IOCTL_MGA_SWAP     DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_SWAP)
 #define DRM_IOCTL_MGA_CLEAR    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t)
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index fe917dee723a..01a714119506 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -197,4 +197,17 @@ struct drm_nouveau_sarea {
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
 
+#define DRM_IOCTL_NOUVEAU_GETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam)
+#define DRM_IOCTL_NOUVEAU_SETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free)
+#define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc)
+#define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC  DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc)
+#define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free)
+#define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
+#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
+#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
+#define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
+#define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
+
 #endif /* __NOUVEAU_DRM_H__ */
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 0acaf8f91437..10f8b53bdd40 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -547,8 +547,8 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
 #define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
-#define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
-#define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
+#define DRM_IOCTL_RADEON_GEM_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
+#define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 
 typedef struct drm_radeon_init {
diff --git a/include/drm/savage_drm.h b/include/drm/savage_drm.h
index 8a576ef01821..4863cf6bf96f 100644
--- a/include/drm/savage_drm.h
+++ b/include/drm/savage_drm.h
@@ -63,10 +63,10 @@ typedef struct _drm_savage_sarea {
 #define DRM_SAVAGE_BCI_EVENT_EMIT	0x02
 #define DRM_SAVAGE_BCI_EVENT_WAIT	0x03
 
-#define DRM_IOCTL_SAVAGE_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
-#define DRM_IOCTL_SAVAGE_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
-#define DRM_IOCTL_SAVAGE_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
-#define DRM_IOCTL_SAVAGE_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
+#define DRM_IOCTL_SAVAGE_BCI_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
+#define DRM_IOCTL_SAVAGE_BCI_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
 
 #define SAVAGE_DMA_PCI	1
 #define SAVAGE_DMA_AGP	3
-- 
cgit v1.2.3-59-g8ed1b


From 99c796df94afca5256860dd4760017f1dbb3480c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 17 Aug 2010 22:13:22 +0100
Subject: VIDEO: amba clcd: don't disable an already disabled clock

Fix the clock enable/disable tracking in the AMBA CLCD driver so
that the driver doesn't try to disable an already disabled clock,
thereby causing the clock (if shared) to become unbalanced.

This resolves a problem with CLCD on LPC32xx ARM platforms.

Reported-by: Kevin Wells <wellsk40@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/video/amba-clcd.c | 10 ++++++++--
 include/linux/amba/clcd.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index afe21e6eb544..1c2c68356ea7 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb)
 	/*
 	 * Disable CLCD clock source.
 	 */
-	clk_disable(fb->clk);
+	if (fb->clk_enabled) {
+		fb->clk_enabled = false;
+		clk_disable(fb->clk);
+	}
 }
 
 static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
@@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	/*
 	 * Enable the CLCD clock source.
 	 */
-	clk_enable(fb->clk);
+	if (!fb->clk_enabled) {
+		fb->clk_enabled = true;
+		clk_enable(fb->clk);
+	}
 
 	/*
 	 * Bring up by first enabling..
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index ca16c3801a1e..be33b3affc8a 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -150,6 +150,7 @@ struct clcd_fb {
 	u16			off_cntl;
 	u32			clcd_cntl;
 	u32			cmap[16];
+	bool			clk_enabled;
 };
 
 static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs)
-- 
cgit v1.2.3-59-g8ed1b


From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 17 Aug 2010 23:52:56 +0100
Subject: Make do_execve() take a const filename pointer

Make do_execve() take a const filename pointer so that kernel_execve() compiles
correctly on ARM:

arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type

This also requires the argv and envp arguments to be consted twice, once for
the pointer array and once for the strings the array points to.  This is
because do_execve() passes a pointer to the filename (now const) to
copy_strings_kernel().  A simpler alternative would be to cast the filename
pointer in do_execve() when it's passed to copy_strings_kernel().

do_execve() may not change any of the strings it is passed as part of the argv
or envp lists as they are some of them in .rodata, so marking these strings as
const should be fine.

Further kernel_execve() and sys_execve() need to be changed to match.

This has been test built on x86_64, frv, arm and mips.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/process.c             |  5 +++--
 arch/arm/kernel/sys_arm.c               | 14 +++++++++-----
 arch/avr32/kernel/process.c             |  5 +++--
 arch/avr32/kernel/sys_avr32.c           |  4 +++-
 arch/blackfin/kernel/process.c          |  4 +++-
 arch/cris/arch-v10/kernel/process.c     |  4 +++-
 arch/cris/arch-v32/kernel/process.c     |  6 ++++--
 arch/frv/kernel/process.c               |  5 +++--
 arch/h8300/kernel/process.c             |  5 ++++-
 arch/h8300/kernel/sys_h8300.c           |  4 +++-
 arch/ia64/kernel/process.c              |  4 +++-
 arch/m32r/kernel/process.c              |  4 ++--
 arch/m32r/kernel/sys_m32r.c             |  4 +++-
 arch/m68k/kernel/process.c              |  4 +++-
 arch/m68k/kernel/sys_m68k.c             |  4 +++-
 arch/m68knommu/kernel/process.c         |  4 +++-
 arch/m68knommu/kernel/sys_m68k.c        |  4 +++-
 arch/microblaze/kernel/sys_microblaze.c | 10 +++++++---
 arch/mips/kernel/syscall.c              | 10 +++++++---
 arch/mn10300/kernel/process.c           |  4 ++--
 arch/parisc/hpux/fs.c                   |  6 ++++--
 arch/parisc/kernel/process.c            | 15 ++++++++++-----
 arch/powerpc/kernel/process.c           |  5 +++--
 arch/s390/kernel/process.c              |  5 +++--
 arch/score/kernel/sys_score.c           | 10 +++++++---
 arch/sh/kernel/process_32.c             |  7 ++++---
 arch/sh/kernel/process_64.c             |  4 ++--
 arch/sh/kernel/sys_sh32.c               |  4 +++-
 arch/sh/kernel/sys_sh64.c               |  4 +++-
 arch/sparc/kernel/process_32.c          |  6 ++++--
 arch/sparc/kernel/process_64.c          |  4 ++--
 arch/sparc/kernel/sys_sparc_32.c        |  4 +++-
 arch/sparc/kernel/sys_sparc_64.c        |  4 +++-
 arch/tile/kernel/process.c              |  5 +++--
 arch/um/kernel/exec.c                   |  5 +++--
 arch/um/kernel/syscall.c                |  4 +++-
 arch/x86/include/asm/syscalls.h         |  5 +++--
 arch/x86/kernel/process.c               |  5 +++--
 arch/x86/kernel/sys_i386_32.c           |  4 +++-
 arch/xtensa/kernel/process.c            |  5 +++--
 fs/binfmt_misc.c                        |  2 +-
 fs/binfmt_script.c                      |  3 ++-
 fs/exec.c                               | 21 +++++++++++----------
 include/linux/binfmts.h                 |  7 ++++---
 include/linux/sched.h                   |  4 +++-
 include/linux/syscalls.h                |  2 +-
 init/do_mounts_initrd.c                 |  7 ++++---
 init/main.c                             |  6 +++---
 kernel/kmod.c                           |  4 +++-
 security/commoncap.c                    |  2 +-
 50 files changed, 179 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 88e608aebc8c..842dba308eab 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp);
  * sys_execve() executes a new program.
  */
 asmlinkage int
-do_sys_execve(const char __user *ufilename, char __user * __user *argv,
-	      char __user * __user *envp, struct pt_regs *regs)
+do_sys_execve(const char __user *ufilename,
+	      const char __user *const __user *argv,
+	      const char __user *const __user *envp, struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 5b7c541a4c63..62e7c61d0342 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
 /* sys_execve() executes a new program.
  * This is called indirectly via a small wrapper
  */
-asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv,
-			  char __user * __user *envp, struct pt_regs *regs)
+asmlinkage int sys_execve(const char __user *filenamei,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp, struct pt_regs *regs)
 {
 	int error;
 	char * filename;
@@ -78,14 +79,17 @@ out:
 	return error;
 }
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	struct pt_regs regs;
 	int ret;
 
 	memset(&regs, 0, sizeof(struct pt_regs));
-	ret = do_execve(filename, (char __user * __user *)argv,
-			(char __user * __user *)envp, &regs);
+	ret = do_execve(filename,
+			(const char __user *const __user *)argv,
+			(const char __user *const __user *)envp, &regs);
 	if (ret < 0)
 		goto out;
 
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index e5daddff397d..9c46aaad11ce 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
 }
 
 asmlinkage int sys_execve(const char __user *ufilename,
-			  char __user *__user *uargv,
-			  char __user *__user *uenvp, struct pt_regs *regs)
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
+			  struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
index 459349b5ed5a..62635a09ae3e 100644
--- a/arch/avr32/kernel/sys_avr32.c
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -7,7 +7,9 @@
  */
 #include <linux/unistd.h>
 
-int kernel_execve(const char *file, char **argv, char **envp)
+int kernel_execve(const char *file,
+		  const char *const *argv,
+		  const char *const *envp)
 {
 	register long scno asm("r8") = __NR_execve;
 	register long sc1 asm("r12") = (long)file;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index a566f61c002a..01f98cb964d2 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char *filename;
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 93f0f64b1326..9a57db6907f5 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *fname, char **argv, char **envp,
+asmlinkage int sys_execve(const char *fname,
+			  const char *const *argv,
+			  const char *const *envp,
 			  long r13, long mof, long srp, 
 			  struct pt_regs *regs)
 {
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 2661a9529d70..562f84718906 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp,
 
 /* sys_execve() executes a new program. */
 asmlinkage int
-sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp,
-	struct pt_regs *regs)
+sys_execve(const char *fname,
+	   const char *const *argv,
+	   const char *const *envp, long r13, long mof, long srp,
+	   struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 428931cf2f0c..2b63b0191f52 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv,
-			  char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 8b7b78d77d5c..97478138e361 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...)
+asmlinkage int sys_execve(const char *name,
+			  const char *const *argv,
+			  const char *const *envp,
+			  int dummy, ...)
 {
 	int error;
 	char * filename;
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index f9b3f44da69f..dc1ac0243b78 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long res __asm__("er0");
 	register char *const *_c __asm__("er3") = envp;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index a879c03b7f1c..16f1c7b04c69 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 }
 
 long
-sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp,
+sys_execve (const char __user *filename,
+	    const char __user *const __user *argv,
+	    const char __user *const __user *envp,
 	    struct pt_regs *regs)
 {
 	char *fname;
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 8665a4d868ec..422bea9f1dbc 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2,
  * sys_execve() executes a new program.
  */
 asmlinkage int sys_execve(const char __user *ufilename,
-			  char __user * __user *uargv,
-			  char __user * __user *uenvp,
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
 			  unsigned long r3, unsigned long r4, unsigned long r5,
 			  unsigned long r6, struct pt_regs regs)
 {
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index 0a00f467edfa..d841fb6cc703 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __scno __asm__ ("r7") = __NR_execve;
 	register long __arg3 __asm__ ("r2") = (long)(envp);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 221d0b71ce39..18732ab23292 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu);
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 77896692eb0a..2f431ece7b5f 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __res asm ("%d0") = __NR_execve;
 	register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c
index 6350f68cd026..4d090d3c0897 100644
--- a/arch/m68knommu/kernel/process.c
+++ b/arch/m68knommu/kernel/process.c
@@ -350,7 +350,9 @@ void dump(struct pt_regs *fp)
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *name, char **argv, char **envp)
+asmlinkage int sys_execve(const char *name,
+			  const char *const *argv,
+			  const char *const *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index d65e9c4c930c..68488ae47f0a 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __res asm ("%d0") = __NR_execve;
 	register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 6abab6ebedbe..2250fe9d269b 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs
 	return do_fork(flags, stack, regs, 0, NULL, NULL);
 }
 
-asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv,
-			char __user *__user *envp, struct pt_regs *regs)
+asmlinkage long microblaze_execve(const char __user *filenamei,
+				  const char __user *const __user *argv,
+				  const char __user *const __user *envp,
+				  struct pt_regs *regs)
 {
 	int error;
 	char *filename;
@@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register const char *__a __asm__("r5") = filename;
 	register const void *__b __asm__("r6") = argv;
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index bddce0bca195..1dc6edff45e0 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs)
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename, (char __user *__user *) (long)regs.regs[5],
-	                  (char __user *__user *) (long)regs.regs[6], &regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) (long)regs.regs[5],
+	                  (const char __user *const __user *) (long)regs.regs[6],
+			  &regs);
 	putname(filename);
 
 out:
@@ -436,7 +438,9 @@ asmlinkage void bad_stack(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __a0 asm("$4") = (unsigned long) filename;
 	register unsigned long __a1 asm("$5") = (unsigned long) argv;
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 762eb325b949..f48373e2bc1c 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void)
 }
 
 asmlinkage long sys_execve(const char __user *name,
-			   char __user * __user *argv,
-			   char __user * __user *envp)
+			   const char __user *const __user *argv,
+			   const char __user *const __user *envp)
 {
 	char *filename;
 	int error;
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 1444875a7611..0dc8543acb4f 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		goto out;
 
-	error = do_execve(filename, (char __user * __user *) regs->gr[25],
-		(char __user * __user *) regs->gr[24], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) regs->gr[25],
+			  (const char __user *const __user *) regs->gr[24],
+			  regs);
 
 	putname(filename);
 
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 76332dadc6e9..4b4b9181a1a0 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs)
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename, (char __user * __user *) regs->gr[25],
-		(char __user * __user *) regs->gr[24], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) regs->gr[25],
+			  (const char __user *const __user *) regs->gr[24],
+			  regs);
 	putname(filename);
 out:
 
 	return error;
 }
 
-extern int __execve(const char *filename, char *const argv[],
-		char *const envp[], struct task_struct *task);
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+extern int __execve(const char *filename,
+		    const char *const argv[],
+		    const char *const envp[], struct task_struct *task);
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	return __execve(filename, argv, envp, current);
 }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index feacfb789686..91356ffda2ca 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
 	flush_fp_to_thread(current);
 	flush_altivec_to_thread(current);
 	flush_spe_to_thread(current);
-	error = do_execve(filename, (char __user * __user *) a1,
-			  (char __user * __user *) a2, regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) a1,
+			  (const char __user *const __user *) a2, regs);
 	putname(filename);
 out:
 	return error;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7eafaf2662b9..d3a2d1c6438e 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -267,8 +267,9 @@ asmlinkage void execve_tail(void)
 /*
  * sys_execve() executes a new program.
  */
-SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv,
-		char __user * __user *, envp)
+SYSCALL_DEFINE3(execve, const char __user *, name,
+		const char __user *const __user *, argv,
+		const char __user *const __user *, envp)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	char *filename;
diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c
index 651096ff8db4..e478bf9a7e91 100644
--- a/arch/score/kernel/sys_score.c
+++ b/arch/score/kernel/sys_score.c
@@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		return error;
 
-	error = do_execve(filename, (char __user *__user*)regs->regs[5],
-			  (char __user *__user *) regs->regs[6], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *)regs->regs[5],
+			  (const char __user *const __user *)regs->regs[6],
+			  regs);
 
 	putname(filename);
 	return error;
@@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __r4 asm("r4") = (unsigned long) filename;
 	register unsigned long __r5 asm("r5") = (unsigned long) argv;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 052981972ae6..762a13984bbd 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv,
-			  char __user * __user *uenvp, unsigned long r7,
-			  struct pt_regs __regs)
+asmlinkage int sys_execve(const char __user *ufilename,
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
+			  unsigned long r7, struct pt_regs __regs)
 {
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	int error;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 68d128d651b3..210c1cabcb7f 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv,
 		goto out;
 
 	error = do_execve(filename,
-			  (char __user * __user *)uargv,
-			  (char __user * __user *)uenvp,
+			  (const char __user *const __user *)uargv,
+			  (const char __user *const __user *)uenvp,
 			  pregs);
 	putname(filename);
 out:
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index eb68bfdd86e6..f56b6fe5c5d0 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __sc0 __asm__ ("r3") = __NR_execve;
 	register long __sc4 __asm__ ("r4") = (long) filename;
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 287235768bc5..c5a38c4bf410 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -33,7 +33,9 @@
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
 	register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 40e29fc8a4d6..17529298c50a 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
 	if(IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			  (char __user * __user *)regs->u_regs[base + UREG_I1],
-			  (char __user * __user *)regs->u_regs[base + UREG_I2],
+			  (const char __user *const  __user *)
+			  regs->u_regs[base + UREG_I1],
+			  (const char __user *const  __user *)
+			  regs->u_regs[base + UREG_I2],
 			  regs);
 	putname(filename);
 out:
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dbe81a368b45..485f54748384 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			  (char __user * __user *)
+			  (const char __user *const __user *)
 			  regs->u_regs[base + UREG_I1],
-			  (char __user * __user *)
+			  (const char __user *const __user *)
 			  regs->u_regs[base + UREG_I2], regs);
 	putname(filename);
 	if (!error) {
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ee995b7dae7e..50794137d710 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -282,7 +282,9 @@ out:
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 3d435c42e6db..f836f4e93afe 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index ed590ad0acdc..985cc28c74c5 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs)
 /*
  * sys_execve() executes a new program.
  */
-long _sys_execve(char __user *path, char __user *__user *argv,
-		 char __user *__user *envp, struct pt_regs *regs)
+long _sys_execve(const char __user *path,
+		 const char __user *const __user *argv,
+		 const char __user *const __user *envp, struct pt_regs *regs)
 {
 	long error;
 	char *filename;
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 59b20d93b6d4..cd145eda3579 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 	PT_REGS_SP(regs) = esp;
 }
 
-static long execve1(const char *file, char __user * __user *argv,
-		    char __user *__user *env)
+static long execve1(const char *file,
+		    const char __user *const __user *argv,
+		    const char __user *const __user *env)
 {
 	long error;
 
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 7427c0b1930c..5ddb246626db 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len,
 	return err;
 }
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	mm_segment_t fs;
 	int ret;
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index feb2ff9bfc2d..f1d8b441fc77 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *);
 /* kernel/process.c */
 int sys_fork(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *, char __user * __user *,
-		char __user * __user *, struct pt_regs *);
+long sys_execve(const char __user *,
+		const char __user *const __user *,
+		const char __user *const __user *, struct pt_regs *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 64ecaf0af9af..57d1868a86aa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread);
 /*
  * sys_execve() executes a new program.
  */
-long sys_execve(const char __user *name, char __user * __user *argv,
-		char __user * __user *envp, struct pt_regs *regs)
+long sys_execve(const char __user *name,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp, struct pt_regs *regs)
 {
 	long error;
 	char *filename;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 196552bb412c..d5e06624e34a 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -28,7 +28,9 @@
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 7c2f38f68ebb..e3558b9a58ba 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
  */
 
 asmlinkage
-long xtensa_execve(const char __user *name, char __user * __user *argv,
-                   char __user * __user *envp,
+long xtensa_execve(const char __user *name,
+		   const char __user *const __user *argv,
+                   const char __user *const __user *envp,
                    long a3, long a4, long a5,
                    struct pt_regs *regs)
 {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9e60fd201716..a7528b913936 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	Node *fmt;
 	struct file * interp_file = NULL;
 	char iname[BINPRM_BUF_SIZE];
-	char *iname_addr = iname;
+	const char *iname_addr = iname;
 	int retval;
 	int fd_binary = -1;
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index aca9d55afb22..396a9884591f 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -16,7 +16,8 @@
 
 static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 {
-	char *cp, *i_name, *i_arg;
+	const char *i_arg, *i_name;
+	char *cp;
 	struct file *file;
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
diff --git a/fs/exec.c b/fs/exec.c
index 7761837e4500..05c7d6b84df7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -361,13 +361,13 @@ err:
 /*
  * count() counts the number of strings in array ARGV.
  */
-static int count(char __user * __user * argv, int max)
+static int count(const char __user * const __user * argv, int max)
 {
 	int i = 0;
 
 	if (argv != NULL) {
 		for (;;) {
-			char __user * p;
+			const char __user * p;
 
 			if (get_user(p, argv))
 				return -EFAULT;
@@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max)
  * processes's memory to the new process's stack.  The call to get_user_pages()
  * ensures the destination page is created and not swapped out.
  */
-static int copy_strings(int argc, char __user * __user * argv,
+static int copy_strings(int argc, const char __user *const __user *argv,
 			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
@@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv,
 	int ret;
 
 	while (argc-- > 0) {
-		char __user *str;
+		const char __user *str;
 		int len;
 		unsigned long pos;
 
@@ -470,12 +470,13 @@ out:
 /*
  * Like copy_strings, but get argv and its values from kernel memory.
  */
-int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
+int copy_strings_kernel(int argc, const char *const *argv,
+			struct linux_binprm *bprm)
 {
 	int r;
 	mm_segment_t oldfs = get_fs();
 	set_fs(KERNEL_DS);
-	r = copy_strings(argc, (char __user * __user *)argv, bprm);
+	r = copy_strings(argc, (const char __user *const  __user *)argv, bprm);
 	set_fs(oldfs);
 	return r;
 }
@@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec);
 void setup_new_exec(struct linux_binprm * bprm)
 {
 	int i, ch;
-	char * name;
+	const char *name;
 	char tcomm[sizeof(current->comm)];
 
 	arch_pick_mmap_layout(current->mm);
@@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler);
 /*
  * sys_execve() executes a new program.
  */
-int do_execve(char * filename,
-	char __user *__user *argv,
-	char __user *__user *envp,
+int do_execve(const char * filename,
+	const char __user *const __user *argv,
+	const char __user *const __user *envp,
 	struct pt_regs * regs)
 {
 	struct linux_binprm *bprm;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c809e286d213..a065612fc928 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -50,8 +50,8 @@ struct linux_binprm{
 	int unsafe;		/* how unsafe this exec is (mask of LSM_UNSAFE_*) */
 	unsigned int per_clear;	/* bits to clear in current->personality */
 	int argc, envc;
-	char * filename;	/* Name of binary as seen by procps */
-	char * interp;		/* Name of the binary really executed. Most
+	const char * filename;	/* Name of binary as seen by procps */
+	const char * interp;	/* Name of the binary really executed. Most
 				   of the time same as filename, but could be
 				   different for binfmt_{misc,script} */
 	unsigned interp_flags;
@@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
-extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
+extern int copy_strings_kernel(int argc, const char *const *argv,
+			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ce160d68f5e7..1e2a6db2d7dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2109,7 +2109,9 @@ extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
-extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+extern int do_execve(const char *,
+		     const char __user * const __user *,
+		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6e5d19788634..e6319d18a55d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  u64 mask, int fd,
 				  const char  __user *pathname);
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
 
 
 asmlinkage long sys_perf_event_open(
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 2b108538d0d9..3098a38f3ae1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,10 +24,11 @@ static int __init no_initrd(char *str)
 
 __setup("noinitrd", no_initrd);
 
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
 {
-	static char *argv[] = { "linuxrc", NULL, };
-	extern char * envp_init[];
+	static const char *argv[] = { "linuxrc", NULL, };
+	extern const char *envp_init[];
+	const char *shell = _shell;
 
 	sys_close(old_fd);sys_close(root_fd);
 	sys_setsid();
diff --git a/init/main.c b/init/main.c
index 22d61cb06f98..94ab488039aa 100644
--- a/init/main.c
+++ b/init/main.c
@@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str)
 
 __setup("reset_devices", set_reset_devices);
 
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
 
 extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void)
 		do_one_initcall(*fn);
 }
 
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
 	kernel_execve(init_filename, argv_init, envp_init);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6e9b19667a8d..9cd0591c96a2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data)
 			goto fail;
 	}
 
-	retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
+	retval = kernel_execve(sub_info->path,
+			       (const char *const *)sub_info->argv,
+			       (const char *const *)sub_info->envp);
 
 	/* Exec failed? */
 fail:
diff --git a/security/commoncap.c b/security/commoncap.c
index 4e015996dd4d..9d172e6e330c 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -40,7 +40,7 @@
  *
  * Warn if that happens, once per boot.
  */
-static void warn_setuid_and_fcaps_mixed(char *fname)
+static void warn_setuid_and_fcaps_mixed(const char *fname)
 {
 	static int warned;
 	if (!warned) {
-- 
cgit v1.2.3-59-g8ed1b


From 5c79a5ae23e72fa12f1c7c528f62bf3ea35da0dc Mon Sep 17 00:00:00 2001
From: Ernst Schwab <eschwab@online.de>
Date: Mon, 16 Aug 2010 15:10:11 +0200
Subject: spi.h: missing kernel-doc notation, please fix

Added comments in kernel-doc notation for previously added struct fields.

Signed-off-by: Ernst Schwab <eschwab@online.de>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/spi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index ae0a5286f558..92e52a1e6af3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @dma_alignment: SPI controller constraint on DMA buffers alignment.
  * @mode_bits: flags understood by this controller driver
  * @flags: other constraints relevant to this driver
+ * @bus_lock_spinlock: spinlock for SPI bus locking
+ * @bus_lock_mutex: mutex for SPI bus locking
+ * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
  * @setup: updates the device mode and clocking records used by a
  *	device's SPI controller; protocol code may call this.  This
  *	must fail if an unrecognized or unsupported mode is requested.
-- 
cgit v1.2.3-59-g8ed1b


From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Aug 2010 17:05:45 +0200
Subject: kill BH_Ordered flag

Instead of abusing a buffer_head flag just add a variant of
sync_dirty_buffer which allows passing the exact type of write
flag required.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 17 ++++++++--------
 fs/jbd/commit.c             | 49 +++++++++++++++++++++++----------------------
 fs/jbd2/commit.c            | 39 ++++++++++++++----------------------
 fs/nilfs2/super.c           | 28 +++++++++++++-------------
 include/linux/buffer_head.h |  3 +--
 5 files changed, 63 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa339e051..6c8ad977f3d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh)
 	BUG_ON(buffer_delay(bh));
 	BUG_ON(buffer_unwritten(bh));
 
-	/*
-	 * Mask in barrier bit for a write (could be either a WRITE or a
-	 * WRITE_SYNC
-	 */
-	if (buffer_ordered(bh) && (rw & WRITE))
-		rw |= WRITE_BARRIER;
-
 	/*
 	 * Only clear out a write error when rewriting
 	 */
@@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block);
  * and then start new I/O and then wait upon it.  The caller must have a ref on
  * the buffer_head.
  */
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
 {
 	int ret = 0;
 
@@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
 	if (test_clear_buffer_dirty(bh)) {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_write_sync;
-		ret = submit_bh(WRITE_SYNC, bh);
+		ret = submit_bh(rw, bh);
 		wait_on_buffer(bh);
 		if (buffer_eopnotsupp(bh)) {
 			clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
 	}
 	return ret;
 }
+EXPORT_SYMBOL(__sync_dirty_buffer);
+
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+	return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
 EXPORT_SYMBOL(sync_dirty_buffer);
 
 /*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9ddaa0c49..95d8c11c929e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
 	struct buffer_head *bh;
 	journal_header_t *header;
 	int ret;
-	int barrier_done = 0;
 
 	if (is_journal_aborted(journal))
 		return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
 
 	JBUFFER_TRACE(descriptor, "write commit block");
 	set_buffer_dirty(bh);
+
 	if (journal->j_flags & JFS_BARRIER) {
-		set_buffer_ordered(bh);
-		barrier_done = 1;
-	}
-	ret = sync_dirty_buffer(bh);
-	if (barrier_done)
-		clear_buffer_ordered(bh);
-	/* is it possible for another commit to fail at roughly
-	 * the same time as this one?  If so, we don't want to
-	 * trust the barrier flag in the super, but instead want
-	 * to remember if we sent a barrier request
-	 */
-	if (ret == -EOPNOTSUPP && barrier_done) {
-		char b[BDEVNAME_SIZE];
+		ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
 
-		printk(KERN_WARNING
-			"JBD: barrier-based sync failed on %s - "
-			"disabling barriers\n",
-			bdevname(journal->j_dev, b));
-		spin_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JFS_BARRIER;
-		spin_unlock(&journal->j_state_lock);
+		/*
+		 * Is it possible for another commit to fail at roughly
+		 * the same time as this one?  If so, we don't want to
+		 * trust the barrier flag in the super, but instead want
+		 * to remember if we sent a barrier request
+		 */
+		if (ret == -EOPNOTSUPP) {
+			char b[BDEVNAME_SIZE];
 
-		/* And try again, without the barrier */
-		set_buffer_uptodate(bh);
-		set_buffer_dirty(bh);
+			printk(KERN_WARNING
+				"JBD: barrier-based sync failed on %s - "
+				"disabling barriers\n",
+				bdevname(journal->j_dev, b));
+			spin_lock(&journal->j_state_lock);
+			journal->j_flags &= ~JFS_BARRIER;
+			spin_unlock(&journal->j_state_lock);
+
+			/* And try again, without the barrier */
+			set_buffer_uptodate(bh);
+			set_buffer_dirty(bh);
+			ret = sync_dirty_buffer(bh);
+		}
+	} else {
 		ret = sync_dirty_buffer(bh);
 	}
+
 	put_bh(bh);		/* One for getblk() */
 	journal_put_journal_head(descriptor);
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8049f1..7c068c189d80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
 	struct commit_header *tmp;
 	struct buffer_head *bh;
 	int ret;
-	int barrier_done = 0;
 	struct timespec now = current_kernel_time();
 
 	if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !JBD2_HAS_INCOMPAT_FEATURE(journal,
 				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-		set_buffer_ordered(bh);
-		barrier_done = 1;
-	}
-	ret = submit_bh(WRITE_SYNC_PLUG, bh);
-	if (barrier_done)
-		clear_buffer_ordered(bh);
-
-	/* is it possible for another commit to fail at roughly
-	 * the same time as this one?  If so, we don't want to
-	 * trust the barrier flag in the super, but instead want
-	 * to remember if we sent a barrier request
-	 */
-	if (ret == -EOPNOTSUPP && barrier_done) {
-		printk(KERN_WARNING
-		       "JBD2: Disabling barriers on %s, "
-		       "not supported by device\n", journal->j_devname);
-		write_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JBD2_BARRIER;
-		write_unlock(&journal->j_state_lock);
+		ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
+		if (ret == -EOPNOTSUPP) {
+			printk(KERN_WARNING
+			       "JBD2: Disabling barriers on %s, "
+			       "not supported by device\n", journal->j_devname);
+			write_lock(&journal->j_state_lock);
+			journal->j_flags &= ~JBD2_BARRIER;
+			write_unlock(&journal->j_state_lock);
 
-		/* And try again, without the barrier */
-		lock_buffer(bh);
-		set_buffer_uptodate(bh);
-		clear_buffer_dirty(bh);
+			/* And try again, without the barrier */
+			lock_buffer(bh);
+			set_buffer_uptodate(bh);
+			clear_buffer_dirty(bh);
+			ret = submit_bh(WRITE_SYNC_PLUG, bh);
+		}
+	} else {
 		ret = submit_bh(WRITE_SYNC_PLUG, bh);
 	}
 	*cbh = bh;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1fa86b9df73b..68345430fb48 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 	int err;
-	int barrier_done = 0;
 
-	if (nilfs_test_opt(sbi, BARRIER)) {
-		set_buffer_ordered(nilfs->ns_sbh[0]);
-		barrier_done = 1;
-	}
  retry:
 	set_buffer_dirty(nilfs->ns_sbh[0]);
-	err = sync_dirty_buffer(nilfs->ns_sbh[0]);
-	if (err == -EOPNOTSUPP && barrier_done) {
-		nilfs_warning(sbi->s_super, __func__,
-			      "barrier-based sync failed. "
-			      "disabling barriers\n");
-		nilfs_clear_opt(sbi, BARRIER);
-		barrier_done = 0;
-		clear_buffer_ordered(nilfs->ns_sbh[0]);
-		goto retry;
+
+	if (nilfs_test_opt(sbi, BARRIER)) {
+		err = __sync_dirty_buffer(nilfs->ns_sbh[0],
+					  WRITE_SYNC | WRITE_BARRIER);
+		if (err == -EOPNOTSUPP) {
+			nilfs_warning(sbi->s_super, __func__,
+				      "barrier-based sync failed. "
+				      "disabling barriers\n");
+			nilfs_clear_opt(sbi, BARRIER);
+			goto retry;
+		}
+	} else {
+		err = sync_dirty_buffer(nilfs->ns_sbh[0]);
 	}
+
 	if (unlikely(err)) {
 		printk(KERN_ERR
 		       "NILFS: unable to write superblock (err=%d)\n", err);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 43e649a72529..72c1cf83eb85 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,6 @@ enum bh_state_bits {
 	BH_Delay,	/* Buffer is not yet allocated on disk */
 	BH_Boundary,	/* Block is followed by a discontiguity */
 	BH_Write_EIO,	/* I/O error on write */
-	BH_Ordered,	/* ordered write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
@@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write)
 BUFFER_FNS(Delay, delay)
 BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
-BUFFER_FNS(Ordered, ordered)
 BUFFER_FNS(Eopnotsupp, eopnotsupp)
 BUFFER_FNS(Unwritten, unwritten)
 
@@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh);
 void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
+int __sync_dirty_buffer(struct buffer_head *bh, int rw);
 int submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
-- 
cgit v1.2.3-59-g8ed1b


From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Aug 2010 17:06:24 +0200
Subject: remove SWRITE* I/O types

These flags aren't real I/O types, but tell ll_rw_block to always
lock the buffer instead of giving up on a failed trylock.

Instead add a new write_dirty_buffer helper that implements this semantic
and use it from the existing SWRITE* callers.  Note that the ll_rw_block
code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which
this patch fixes.

In the ufs code clean up the helper that used to call ll_rw_block
to mirror sync_dirty_buffer, which is the function it implements for
compound buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 52 +++++++++++++++++++++++++--------------------
 fs/fat/misc.c               |  4 +++-
 fs/jbd/checkpoint.c         |  4 +++-
 fs/jbd/journal.c            |  2 +-
 fs/jbd/revoke.c             |  2 +-
 fs/jbd2/checkpoint.c        |  4 +++-
 fs/jbd2/journal.c           |  2 +-
 fs/jbd2/revoke.c            |  2 +-
 fs/reiserfs/journal.c       |  2 +-
 fs/ufs/balloc.c             | 24 +++++++--------------
 fs/ufs/ialloc.c             | 18 ++++++----------
 fs/ufs/truncate.c           | 18 ++++++----------
 fs/ufs/util.c               | 20 +++++++----------
 fs/ufs/util.h               |  3 +--
 include/linux/buffer_head.h |  1 +
 include/linux/fs.h          |  9 --------
 16 files changed, 73 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6c8ad977f3d4..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				spin_unlock(lock);
 				/*
 				 * Ensure any pending I/O completes so that
-				 * ll_rw_block() actually writes the current
-				 * contents - it is a noop if I/O is still in
-				 * flight on potentially older contents.
+				 * write_dirty_buffer() actually writes the
+				 * current contents - it is a noop if I/O is
+				 * still in flight on potentially older
+				 * contents.
 				 */
-				ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+				write_dirty_buffer(bh, WRITE_SYNC_PLUG);
 
 				/*
 				 * Kick off IO for the previous mapping. Note
@@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
 
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
  * @nr: number of &struct buffer_heads in the array
  * @bhs: array of pointers to &struct buffer_head
  *
  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
  * requests an I/O operation on them, either a %READ or a %WRITE.  The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
- * are sent to disk. The fourth %READA option is described in the documentation
- * for generic_make_request() which ll_rw_block() calls.
+ * %READA option is described in the documentation for generic_make_request()
+ * which ll_rw_block() calls.
  *
  * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
- * clean when doing a write request, and any buffer that appears to be
- * up-to-date when doing read request.  Further it marks as clean buffers that
- * are processed for writing (the buffer cache won't assume that they are
- * actually clean until the buffer gets unlocked).
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
+ * request, and any buffer that appears to be up-to-date when doing read
+ * request.  Further it marks as clean buffers that are processed for
+ * writing (the buffer cache won't assume that they are actually clean
+ * until the buffer gets unlocked).
  *
  * ll_rw_block sets b_end_io to simple completion handler that marks
  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
 
-		if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
-			lock_buffer(bh);
-		else if (!trylock_buffer(bh))
+		if (!trylock_buffer(bh))
 			continue;
-
-		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
-		    rw == SWRITE_SYNC_PLUG) {
+		if (rw == WRITE) {
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
 				get_bh(bh);
-				if (rw == SWRITE_SYNC)
-					submit_bh(WRITE_SYNC, bh);
-				else
-					submit_bh(WRITE, bh);
+				submit_bh(WRITE, bh);
 				continue;
 			}
 		} else {
@@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 }
 EXPORT_SYMBOL(ll_rw_block);
 
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+	lock_buffer(bh);
+	if (!test_clear_buffer_dirty(bh)) {
+		unlock_buffer(bh);
+		return;
+	}
+	bh->b_end_io = end_buffer_write_sync;
+	get_bh(bh);
+	submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
+
 /*
  * For a data-integrity writeout, we need to wait upon any in-progress I/O
  * and then start new I/O and then wait upon it.  The caller must have a ref on
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6ffba5..1736f2356388 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 {
 	int i, err = 0;
 
-	ll_rw_block(SWRITE, nr_bhs, bhs);
+	for (i = 0; i < nr_bhs; i++)
+		write_dirty_buffer(bhs[i], WRITE);
+
 	for (i = 0; i < nr_bhs; i++) {
 		wait_on_buffer(bhs[i]);
 		if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..05a38b9c4c0e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
-	ll_rw_block(SWRITE, *batch_count, bhs);
+	for (i = 0; i < *batch_count; i++)
+		write_dirty_buffer(bhs[i], WRITE);
+
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94693d8..2c4b1f109da9 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 	if (wait)
 		sync_dirty_buffer(bh);
 	else
-		ll_rw_block(SWRITE, 1, &bh);
+		write_dirty_buffer(bh, WRITE);
 
 out:
 	/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
 	set_buffer_dirty(bh);
-	ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+	write_dirty_buffer(bh, write_op);
 }
 #endif
 
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f4e8a3..5247e7ffdcb4 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
 {
 	int i;
 
-	ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs);
+	for (i = 0; i < *batch_count; i++)
+		write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
+
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = journal->j_chkpt_bhs[i];
 		clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866aaf0f9..0e8014ea6b94 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
 			set_buffer_uptodate(bh);
 		}
 	} else
-		ll_rw_block(SWRITE, 1, &bh);
+		write_dirty_buffer(bh, WRITE);
 
 out:
 	/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06af2e3..9ad321fd63fd 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
 	set_buffer_dirty(bh);
-	ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+	write_dirty_buffer(bh, write_op);
 }
 #endif
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b1f036..812e2c05aa29 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
 	/* flush out the real blocks */
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		set_buffer_dirty(real_blocks[i]);
-		ll_rw_block(SWRITE, 1, real_blocks + i);
+		write_dirty_buffer(real_blocks[i], WRITE);
 	}
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		wait_on_buffer(real_blocks[i]);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484fb10d2..46f7a807bbc1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 	
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 	
 	unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
 
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 
 	if (overflow) {
 		fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
 succed:
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e018fe..2eabf04af3de 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
 
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	
 	sb->s_dirt = 1;
 	unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
 
 	fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
 	ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer(UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 
 	UFSD("EXIT\n");
 }
@@ -290,10 +286,8 @@ cg_found:
 	}
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb135320..a58f9155fc9a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(ind_ubh);
 		ind_ubh = NULL;
 	}
-	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
-		ubh_ll_rw_block(SWRITE, ind_ubh);
-		ubh_wait_on_buffer (ind_ubh);
-	}
+	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+		ubh_sync_block(ind_ubh);
 	ubh_brelse (ind_ubh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(dind_bh);
 		dind_bh = NULL;
 	}
-	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
-		ubh_ll_rw_block(SWRITE, dind_bh);
-		ubh_wait_on_buffer (dind_bh);
-	}
+	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+		ubh_sync_block(dind_bh);
 	ubh_brelse (dind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
 		ubh_bforget(tind_bh);
 		tind_bh = NULL;
 	}
-	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
-		ubh_ll_rw_block(SWRITE, tind_bh);
-		ubh_wait_on_buffer (tind_bh);
-	}
+	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+		ubh_sync_block(tind_bh);
 	ubh_brelse (tind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9e4a4e..d2c36d53fe66 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
 	}
 }
 
-void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh)
+void ubh_sync_block(struct ufs_buffer_head *ubh)
 {
-	if (!ubh)
-		return;
+	if (ubh) {
+		unsigned i;
 
-	ll_rw_block(rw, ubh->count, ubh->bh);
-}
+		for (i = 0; i < ubh->count; i++)
+			write_dirty_buffer(ubh->bh[i], WRITE);
 
-void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
-{
-	unsigned i;
-	if (!ubh)
-		return;
-	for ( i = 0; i < ubh->count; i++ )
-		wait_on_buffer (ubh->bh[i]);
+		for (i = 0; i < ubh->count; i++)
+			wait_on_buffer(ubh->bh[i]);
+	}
 }
 
 void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036912f1..9f8775ce381c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
 extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
 extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
 extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
-extern void ubh_ll_rw_block(int, struct ufs_buffer_head *);
-extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
+extern void ubh_sync_block(struct ufs_buffer_head *);
 extern void ubh_bforget (struct ufs_buffer_head *);
 extern int  ubh_buffer_dirty (struct ufs_buffer_head *);
 #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 72c1cf83eb85..ec94c12f21da 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int __sync_dirty_buffer(struct buffer_head *bh, int rw);
+void write_dirty_buffer(struct buffer_head *bh, int rw);
 int submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a96b4d83fc1..29f7c975304c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,9 +125,6 @@ struct inodes_stat_t {
  *			block layer could (in theory) choose to ignore this
  *			request if it runs into resource problems.
  * WRITE		A normal async write. Device will be plugged.
- * SWRITE		Like WRITE, but a special case for ll_rw_block() that
- *			tells it to lock the buffer first. Normally a buffer
- *			must be locked before doing IO.
  * WRITE_SYNC_PLUG	Synchronous write. Identical to WRITE, but passes down
  *			the hint that someone will be waiting on this IO
  *			shortly. The device must still be unplugged explicitly,
@@ -138,9 +135,6 @@ struct inodes_stat_t {
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
  * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
- * SWRITE_SYNC
- * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
- *			See SWRITE.
  * WRITE_BARRIER	Like WRITE_SYNC, but tells the block layer that all
  *			previously submitted writes must be safely on storage
  *			before this one is started. Also guarantees that when
@@ -155,7 +149,6 @@ struct inodes_stat_t {
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
-#define SWRITE			(WRITE | READA)
 
 #define READ_SYNC		(READ | REQ_SYNC | REQ_UNPLUG)
 #define READ_META		(READ | REQ_META)
@@ -165,8 +158,6 @@ struct inodes_stat_t {
 #define WRITE_META		(WRITE | REQ_META)
 #define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_HARDBARRIER)
-#define SWRITE_SYNC_PLUG	(SWRITE | REQ_SYNC | REQ_NOIDLE)
-#define SWRITE_SYNC		(SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
 
 /*
  * These aren't really reads or writes, they pass down information about
-- 
cgit v1.2.3-59-g8ed1b


From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:33 +1000
Subject: fs: fs_struct rwlock to spinlock

fs: fs_struct rwlock to spinlock

struct fs_struct.lock is an rwlock with the read-side used to protect root and
pwd members while taking references to them. Taking a reference to a path
typically requires just 2 atomic ops, so the critical section is very small.
Parallel read-side operations would have cacheline contention on the lock, the
dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a
real parallelism increase.

Replace it with a spinlock to avoid one or two atomic operations in typical
path lookup fastpath.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/pohmelfs/path_entry.c |  8 ++++----
 fs/exec.c                             |  4 ++--
 fs/fs_struct.c                        | 32 ++++++++++++++++----------------
 include/linux/fs_struct.h             | 14 +++++++-------
 kernel/fork.c                         | 10 +++++-----
 5 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index cdc4dd50d638..8ec83d2dffb7 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le
 		return -ENOENT;
 	}
 
-	read_lock(&current->fs->lock);
+	spin_lock(&current->fs->lock);
 	path.mnt = mntget(current->fs->root.mnt);
-	read_unlock(&current->fs->lock);
+	spin_unlock(&current->fs->lock);
 
 	path.dentry = d;
 
@@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
 		return -ENOENT;
 	}
 
-	read_lock(&current->fs->lock);
+	spin_lock(&current->fs->lock);
 	root = dget(current->fs->root.dentry);
-	read_unlock(&current->fs->lock);
+	spin_unlock(&current->fs->lock);
 
 	spin_lock(&dcache_lock);
 
diff --git a/fs/exec.c b/fs/exec.c
index 7761837e4500..5adab2c93eca 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
 	bprm->unsafe = tracehook_unsafe_exec(p);
 
 	n_fs = 1;
-	write_lock(&p->fs->lock);
+	spin_lock(&p->fs->lock);
 	rcu_read_lock();
 	for (t = next_thread(p); t != p; t = next_thread(t)) {
 		if (t->fs == p->fs)
@@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
 			res = 1;
 		}
 	}
-	write_unlock(&p->fs->lock);
+	spin_unlock(&p->fs->lock);
 
 	return res;
 }
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb9a2c0..ed45a9cf5f3d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 {
 	struct path old_root;
 
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	old_root = fs->root;
 	fs->root = *path;
 	path_get(path);
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 	if (old_root.dentry)
 		path_put(&old_root);
 }
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
 	struct path old_pwd;
 
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
 	path_get(path);
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
 		path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
-			write_lock(&fs->lock);
+			spin_lock(&fs->lock);
 			if (fs->root.dentry == old_root->dentry
 			    && fs->root.mnt == old_root->mnt) {
 				path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 				fs->pwd = *new_root;
 				count++;
 			}
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 		}
 		task_unlock(p);
 	} while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
 	if (fs) {
 		int kill;
 		task_lock(tsk);
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		tsk->fs = NULL;
 		kill = !--fs->users;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 		task_unlock(tsk);
 		if (kill)
 			free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 	if (fs) {
 		fs->users = 1;
 		fs->in_exec = 0;
-		rwlock_init(&fs->lock);
+		spin_lock_init(&fs->lock);
 		fs->umask = old->umask;
 		get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
 	}
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
 		return -ENOMEM;
 
 	task_lock(current);
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	kill = !--fs->users;
 	current->fs = new_fs;
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 	task_unlock(current);
 
 	if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
 /* to be mentioned only in INIT_TASK */
 struct fs_struct init_fs = {
 	.users		= 1,
-	.lock		= __RW_LOCK_UNLOCKED(init_fs.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
 	.umask		= 0022,
 };
 
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
 
 		task_lock(current);
 
-		write_lock(&init_fs.lock);
+		spin_lock(&init_fs.lock);
 		init_fs.users++;
-		write_unlock(&init_fs.lock);
+		spin_unlock(&init_fs.lock);
 
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		current->fs = &init_fs;
 		kill = !--fs->users;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 
 		task_unlock(current);
 		if (kill)
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index eca3d5202138..a42b5bf02f8b 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -5,7 +5,7 @@
 
 struct fs_struct {
 	int users;
-	rwlock_t lock;
+	spinlock_t lock;
 	int umask;
 	int in_exec;
 	struct path root, pwd;
@@ -23,29 +23,29 @@ extern int unshare_fs_struct(void);
 
 static inline void get_fs_root(struct fs_struct *fs, struct path *root)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*root = fs->root;
 	path_get(root);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*pwd = fs->pwd;
 	path_get(pwd);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
 				       struct path *pwd)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*root = fs->root;
 	path_get(root);
 	*pwd = fs->pwd;
 	path_get(pwd);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 #endif /* _LINUX_FS_STRUCT_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b450876f93..856eac3ec52e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 	struct fs_struct *fs = current->fs;
 	if (clone_flags & CLONE_FS) {
 		/* tsk->fs is already what we want */
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		if (fs->in_exec) {
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 			return -EAGAIN;
 		}
 		fs->users++;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 		return 0;
 	}
 	tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 
 		if (new_fs) {
 			fs = current->fs;
-			write_lock(&fs->lock);
+			spin_lock(&fs->lock);
 			current->fs = new_fs;
 			if (--fs->users)
 				new_fs = NULL;
 			else
 				new_fs = fs;
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 		}
 
 		if (new_mm) {
-- 
cgit v1.2.3-59-g8ed1b


From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:35 +1000
Subject: fs: cleanup files_lock locking

fs: cleanup files_lock locking

Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
manipulate the per-sb files list; unexport the files_lock spinlock.

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/pty.c       |  6 +++++-
 drivers/char/tty_io.c    | 26 ++++++++++++++++++--------
 fs/file_table.c          | 42 ++++++++++++++++++------------------------
 fs/open.c                |  4 ++--
 include/linux/fs.h       |  7 ++-----
 include/linux/tty.h      |  1 +
 security/selinux/hooks.c |  4 ++--
 7 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index ad46eae1f9bb..2c64faa8efa4 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 0350c42375a2..cd5b829634ea 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
 
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
 static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
 ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 	struct list_head *p;
 	int count = 0;
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_for_each(p, &tty->tty_files) {
 		count++;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty)
 	   workqueue with the lock held */
 	check_tty_count(tty, "tty_hangup");
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
 	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
 		if (filp->f_op->write == redirected_tty_write)
@@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty)
 		__tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	tty_ldisc_hangup(tty);
 
@@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work)
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_del_init(&tty->tty_files);
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	put_pid(tty->pgrp);
 	put_pid(tty->session);
@@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	file_kill(filp);
+	spin_lock(&tty_files_lock);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	list_del_init(&filp->f_u.fu_list);
+	spin_unlock(&tty_files_lock);
 	filp->private_data = NULL;
 
 	/*
@@ -1840,7 +1846,11 @@ got_driver:
 	}
 
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36fed9b..6f0e62ecfddd 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +248,7 @@ static void __fput(struct file *file)
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_kill(file);
+	file_sb_list_del(file);
 	if (file->f_mode & FMODE_WRITE)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
@@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 	return file;
 }
 
-
 void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_kill(file);
+		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	if (!list)
-		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	spin_lock(&files_lock);
+	BUG_ON(!list_empty(&file->f_u.fu_list));
+	list_add(&file->f_u.fu_list, &sb->s_files);
+	spin_unlock(&files_lock);
 }
 
-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
+		spin_lock(&files_lock);
 		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
+		spin_unlock(&files_lock);
 	}
 }
 
@@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb)
 	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb)
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 0;
 }
 
@@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb)
 	struct file *f;
 
 retry:
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
@@ -408,16 +405,13 @@ retry:
 			continue;
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
+		/* This can sleep, so we can't hold the spinlock. */
+		spin_unlock(&files_lock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 }
 
 void __init files_init(unsigned long mempages)
diff --git a/fs/open.c b/fs/open.c
index 630715f9f73d..d74e1983e8dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	file_sb_list_add(f, inode->i_sb);
 
 	error = security_dentry_open(f, cred);
 	if (error)
@@ -721,7 +721,7 @@ cleanup_all:
 			mnt_drop_write(mnt);
 		}
 	}
-	file_kill(f);
+	file_sb_list_del(f);
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29f7c975304c..5a9a9e5a3705 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -944,9 +944,6 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)	atomic_long_inc(&(x)->f_count)
 #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1437da3ddc62..f6b371a2514e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
 extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
 
 extern void tty_write_unlock(struct tty_struct *tty);
 extern int tty_write_lock(struct tty_struct *tty, int ndelay);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 42043f96e54f..bd7da0f0ccf3 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	tty = get_current_tty();
 	if (tty) {
-		file_list_lock();
+		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
 			struct inode *inode;
 
@@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 				drop_tty = 1;
 			}
 		}
-		file_list_unlock();
+		spin_unlock(&tty_files_lock);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
-- 
cgit v1.2.3-59-g8ed1b


From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:36 +1000
Subject: tty: fix fu_list abuse

tty: fix fu_list abuse

tty code abuses fu_list, which causes a bug in remount,ro handling.

If a tty device node is opened on a filesystem, then the last link to the inode
removed, the filesystem will be allowed to be remounted readonly. This is
because fs_may_remount_ro does not find the 0 link tty inode on the file sb
list (because the tty code incorrectly removed it to use for its own purpose).
This can result in a filesystem with errors after it is marked "clean".

Taking idea from Christoph's initial patch, allocate a tty private struct
at file->private_data and put our required list fields in there, linking
file and tty. This makes tty nodes behave the same way as other device nodes
and avoid meddling with the vfs, and avoids this bug.

The error handling is not trivial in the tty code, so for this bugfix, I take
the simple approach of using __GFP_NOFAIL and don't worry about memory errors.
This is not a problem because our allocator doesn't fail small allocs as a rule
anyway. So proper error handling is left as an exercise for tty hackers.

[ Arguably filesystem's device inode would ideally be divorced from the
driver's pseudo inode when it is opened, but in practice it's not clear whether
that will ever be worth implementing. ]

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/pty.c       |  6 +---
 drivers/char/tty_io.c    | 84 +++++++++++++++++++++++++++++++-----------------
 fs/internal.h            |  2 ++
 include/linux/fs.h       |  2 --
 include/linux/tty.h      |  8 +++++
 security/selinux/hooks.c |  5 ++-
 6 files changed, 69 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 2c64faa8efa4..c350d01716bd 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 	}
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
-	filp->private_data = tty;
 
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index cd5b829634ea..949067a0bd47 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty)
 	kfree(tty);
 }
 
+static inline struct tty_struct *file_tty(struct file *file)
+{
+	return ((struct tty_file_private *)file->private_data)->tty;
+}
+
+/* Associate a new file with the tty structure */
+void tty_add_file(struct tty_struct *tty, struct file *file)
+{
+	struct tty_file_private *priv;
+
+	/* XXX: must implement proper error handling in callers */
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
+
+	priv->tty = tty;
+	priv->file = file;
+	file->private_data = priv;
+
+	spin_lock(&tty_files_lock);
+	list_add(&priv->list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
+}
+
+/* Delete file from its tty */
+void tty_del_file(struct file *file)
+{
+	struct tty_file_private *priv = file->private_data;
+
+	spin_lock(&tty_files_lock);
+	list_del(&priv->list);
+	spin_unlock(&tty_files_lock);
+	file->private_data = NULL;
+	kfree(priv);
+}
+
+
 #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
 
 /**
@@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty)
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
+	struct tty_file_private *priv;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty)
 
 
 	spin_lock(&redirect_lock);
-	if (redirect && redirect->private_data == tty) {
+	if (redirect && file_tty(redirect) == tty) {
 		f = redirect;
 		redirect = NULL;
 	}
@@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty)
 
 	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
-	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+	list_for_each_entry(priv, &tty->tty_files, list) {
+		filp = priv->file;
 		if (filp->f_op->write == redirected_tty_write)
 			cons_filp = filp;
 		if (filp->f_op->write != tty_write)
@@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
 	int i;
-	struct tty_struct *tty;
-	struct inode *inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 
-	tty = file->private_data;
-	inode = file->f_path.dentry->d_inode;
 	if (tty_paranoia_check(tty, inode, "tty_read"))
 		return -EIO;
 	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 static ssize_t tty_write(struct file *file, const char __user *buf,
 						size_t count, loff_t *ppos)
 {
-	struct tty_struct *tty;
 	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
+ 	struct tty_ldisc *ld;
 	ssize_t ret;
-	struct tty_ldisc *ld;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_write"))
 		return -EIO;
 	if (!tty || !tty->ops->write ||
@@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx)
 
 int tty_release(struct inode *inode, struct file *filp)
 {
-	struct tty_struct *tty, *o_tty;
+	struct tty_struct *tty = file_tty(filp);
+	struct tty_struct *o_tty;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	devpts;
 	int	idx;
 	char	buf[64];
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_release_dev"))
 		return 0;
 
@@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	spin_lock(&tty_files_lock);
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	list_del_init(&filp->f_u.fu_list);
-	spin_unlock(&tty_files_lock);
-	filp->private_data = NULL;
+	tty_del_file(filp);
 
 	/*
 	 * Perform some housekeeping before deciding whether to return.
@@ -1845,12 +1875,8 @@ got_driver:
 		return PTR_ERR(tty);
 	}
 
-	filp->private_data = tty;
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
+
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -1926,11 +1952,10 @@ got_driver:
 
 static unsigned int tty_poll(struct file *filp, poll_table *wait)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	struct tty_ldisc *ld;
 	int ret = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
 		return 0;
 
@@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
 
 static int __tty_fasync(int fd, struct file *filp, int on)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	unsigned long flags;
 	int retval = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
 		goto out;
 
@@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty);
  */
 long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct tty_struct *tty, *real_tty;
+	struct tty_struct *tty = file_tty(file);
+	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int retval;
 	struct tty_ldisc *ld;
 	struct inode *inode = file->f_dentry->d_inode;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
@@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct tty_struct *tty = file->private_data;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 	int retval = -ENOIOCTLCMD;
 
@@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty)
 				if (!filp)
 					continue;
 				if (filp->f_op->read == tty_read &&
-				    filp->private_data == tty) {
+				    file_tty(filp) == tty) {
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
 					    task_pid_nr(p), p->comm, i);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc60a66..6a5c13a80660 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
 /*
  * file_table.c
  */
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a9a9e5a3705..5e65add0f163 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f6b371a2514e..67d64e6efe7a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -329,6 +329,13 @@ struct tty_struct {
 	struct tty_port *port;
 };
 
+/* Each of a tty's open files has private_data pointing to tty_file_private */
+struct tty_file_private {
+	struct tty_struct *tty;
+	struct file *file;
+	struct list_head list;
+};
+
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
@@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p);
 extern struct tty_struct *get_current_tty(void);
 extern void tty_default_fops(struct file_operations *fops);
 extern struct tty_struct *alloc_tty_struct(void);
+extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index bd7da0f0ccf3..4796ddd4e721 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 	if (tty) {
 		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
+			struct tty_file_private *file_priv;
 			struct inode *inode;
 
 			/* Revalidate access to controlling tty.
@@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 			   than using file_has_perm, as this particular open
 			   file may belong to another process and we are only
 			   interested in the inode-based check here. */
-			file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+			file_priv = list_first_entry(&tty->tty_files,
+						struct tty_file_private, list);
+			file = file_priv->file;
 			inode = file->f_path.dentry->d_inode;
 			if (inode_has_perm(cred, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
-- 
cgit v1.2.3-59-g8ed1b


From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:37 +1000
Subject: lglock: introduce special lglock and brlock spin locks

lglock: introduce special lglock and brlock spin locks

This patch introduces "local-global" locks (lglocks). These can be used to:

- Provide fast exclusive access to per-CPU data, with exclusive access to
  another CPU's data allowed but possibly subject to contention, and to provide
  very slow exclusive access to all per-CPU data.
- Or to provide very fast and scalable read serialisation, and to provide
  very slow exclusive serialisation of data (not necessarily per-CPU data).

Brlocks are also implemented as a short-hand notation for the latter use
case.

Thanks to Paul for local/global naming convention.

Cc: linux-kernel@vger.kernel.org
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 include/linux/lglock.h

(limited to 'include')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
new file mode 100644
index 000000000000..b288cb713b90
--- /dev/null
+++ b/include/linux/lglock.h
@@ -0,0 +1,172 @@
+/*
+ * Specialised local-global spinlock. Can only be declared as global variables
+ * to avoid overhead and keep things simple (and we don't want to start using
+ * these inside dynamically allocated structures).
+ *
+ * "local/global locks" (lglocks) can be used to:
+ *
+ * - Provide fast exclusive access to per-CPU data, with exclusive access to
+ *   another CPU's data allowed but possibly subject to contention, and to
+ *   provide very slow exclusive access to all per-CPU data.
+ * - Or to provide very fast and scalable read serialisation, and to provide
+ *   very slow exclusive serialisation of data (not necessarily per-CPU data).
+ *
+ * Brlocks are also implemented as a short-hand notation for the latter use
+ * case.
+ *
+ * Copyright 2009, 2010, Nick Piggin, Novell Inc.
+ */
+#ifndef __LINUX_LGLOCK_H
+#define __LINUX_LGLOCK_H
+
+#include <linux/spinlock.h>
+#include <linux/lockdep.h>
+#include <linux/percpu.h>
+
+/* can make br locks by using local lock for read side, global lock for write */
+#define br_lock_init(name)	name##_lock_init()
+#define br_read_lock(name)	name##_local_lock()
+#define br_read_unlock(name)	name##_local_unlock()
+#define br_write_lock(name)	name##_global_lock_online()
+#define br_write_unlock(name)	name##_global_unlock_online()
+
+#define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
+#define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
+
+
+#define lg_lock_init(name)	name##_lock_init()
+#define lg_local_lock(name)	name##_local_lock()
+#define lg_local_unlock(name)	name##_local_unlock()
+#define lg_local_lock_cpu(name, cpu)	name##_local_lock_cpu(cpu)
+#define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
+#define lg_global_lock(name)	name##_global_lock()
+#define lg_global_unlock(name)	name##_global_unlock()
+#define lg_global_lock_online(name) name##_global_lock_online()
+#define lg_global_unlock_online(name) name##_global_unlock_online()
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define LOCKDEP_INIT_MAP lockdep_init_map
+
+#define DEFINE_LGLOCK_LOCKDEP(name)					\
+ struct lock_class_key name##_lock_key;					\
+ struct lockdep_map name##_lock_dep_map;				\
+ EXPORT_SYMBOL(name##_lock_dep_map)
+
+#else
+#define LOCKDEP_INIT_MAP(a, b, c, d)
+
+#define DEFINE_LGLOCK_LOCKDEP(name)
+#endif
+
+
+#define DECLARE_LGLOCK(name)						\
+ extern void name##_lock_init(void);					\
+ extern void name##_local_lock(void);					\
+ extern void name##_local_unlock(void);					\
+ extern void name##_local_lock_cpu(int cpu);				\
+ extern void name##_local_unlock_cpu(int cpu);				\
+ extern void name##_global_lock(void);					\
+ extern void name##_global_unlock(void);				\
+ extern void name##_global_lock_online(void);				\
+ extern void name##_global_unlock_online(void);				\
+
+#define DEFINE_LGLOCK(name)						\
+									\
+ DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
+ DEFINE_LGLOCK_LOCKDEP(name);						\
+									\
+ void name##_lock_init(void) {						\
+	int i;								\
+	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
+	for_each_possible_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_lock_init);					\
+									\
+ void name##_local_lock(void) {						\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock);					\
+									\
+ void name##_local_unlock(void) {					\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock);					\
+									\
+ void name##_local_lock_cpu(int cpu) {					\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock_cpu);					\
+									\
+ void name##_local_unlock_cpu(int cpu) {				\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock_cpu);				\
+									\
+ void name##_global_lock_online(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock_online);				\
+									\
+ void name##_global_unlock_online(void) {				\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock_online);				\
+									\
+ void name##_global_lock(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock);					\
+									\
+ void name##_global_unlock(void) {					\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock);
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:38 +1000
Subject: fs: scale files_lock

fs: scale files_lock

Improve scalability of files_lock by adding per-cpu, per-sb files lists,
protected with an lglock. The lglock provides fast access to the per-cpu lists
to add and remove files. It also provides a snapshot of all the per-cpu lists
(although this is very slow).

One difficulty with this approach is that a file can be removed from the list
by another CPU. We must track which per-cpu list the file is on with a new
variale in the file struct (packed into a hole on 64-bit archs). Scalability
could suffer if files are frequently removed from different cpu's list.

However loads with frequent removal of files imply short interval between
adding and removing the files, and the scheduler attempts to avoid moving
processes too far away. Also, even in the case of cross-CPU removal, the
hardware has much more opportunity to parallelise cacheline transfers with N
cachelines than with 1.

A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs
degenerates to contending on a single lock, which is no worse than before. When
more than one CPU are allocating files, even if they are always freed by
different CPUs, there will be more parallelism than the single-lock case.

Testing results:

On a 2 socket, 8 core opteron, I measure the number of times the lock is taken
to remove the file, the number of times it is removed by the same CPU that
added it, and the number of times it is removed by the same node that added it.

Booting:    locks=  25049 cpu-hits=  23174 (92.5%) node-hits=  23945 (95.6%)
kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%)
dbench 64   locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%)

So a file is removed from the same CPU it was added by over 90% of the time.
It remains within the same node 95% of the time.

Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile.

                throughput
2.6.34-rc2      24.5
+patch          24.9

                us      sys     idle    IO wait (in %)
2.6.34-rc2      51.25   28.25   17.25   3.25
+patch          53.75   18.5    19      8.75

So significantly less CPU time spent in kernel code, higher idle time and
slightly higher throughput.

Single threaded performance difference was within the noise of microbenchmarks.
That is not to say penalty does not exist, the code is larger and more memory
accesses required so it will be slightly slower.

Cc: linux-kernel@vger.kernel.org
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 108 ++++++++++++++++++++++++++++++++++++++++++++---------
 fs/super.c         |  18 +++++++++
 include/linux/fs.h |   7 ++++
 3 files changed, 115 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 6f0e62ecfddd..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
+#include <linux/lglock.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu.h>
 #include <linux/ima.h>
 
 #include <asm/atomic.h>
@@ -32,7 +34,8 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+DECLARE_LGLOCK(files_lglock);
+DEFINE_LGLOCK(files_lglock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -336,30 +339,98 @@ void put_filp(struct file *file)
 	}
 }
 
+static inline int file_list_cpu(struct file *file)
+{
+#ifdef CONFIG_SMP
+	return file->f_sb_list_cpu;
+#else
+	return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+	struct list_head *list;
+#ifdef CONFIG_SMP
+	int cpu;
+	cpu = smp_processor_id();
+	file->f_sb_list_cpu = cpu;
+	list = per_cpu_ptr(sb->s_files, cpu);
+#else
+	list = &sb->s_files;
+#endif
+	list_add(&file->f_u.fu_list, list);
+}
+
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
 void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	spin_lock(&files_lock);
-	BUG_ON(!list_empty(&file->f_u.fu_list));
-	list_add(&file->f_u.fu_list, &sb->s_files);
-	spin_unlock(&files_lock);
+	lg_local_lock(files_lglock);
+	__file_sb_list_add(file, sb);
+	lg_local_unlock(files_lglock);
 }
 
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
 void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		spin_lock(&files_lock);
+		lg_local_lock_cpu(files_lglock, file_list_cpu(file));
 		list_del_init(&file->f_u.fu_list);
-		spin_unlock(&files_lock);
+		lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
 	}
 }
 
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	int i;							\
+	for_each_possible_cpu(i) {				\
+		struct list_head *list;				\
+		list = per_cpu_ptr((__sb)->s_files, i);		\
+		list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+	}							\
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	struct list_head *list;					\
+	list = &(sb)->s_files;					\
+	list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+}
+
+#endif
+
 int fs_may_remount_ro(struct super_block *sb)
 {
 	struct file *file;
-
 	/* Check that no files are currently opened for writing. */
-	spin_lock(&files_lock);
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, file) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
@@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
 		/* Writeable file? */
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	spin_unlock(&files_lock);
+	lg_global_unlock(files_lglock);
 	return 0;
 }
 
@@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
 	struct file *f;
 
 retry:
-	spin_lock(&files_lock);
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, f) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
 		       continue;
@@ -406,12 +477,12 @@ retry:
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
 		/* This can sleep, so we can't hold the spinlock. */
-		spin_unlock(&files_lock);
+		lg_global_unlock(files_lglock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 }
 
 void __init files_init(unsigned long mempages)
@@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages)
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/super.c b/fs/super.c
index 9674ab2c8718..8819e3a7ff20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
 			s = NULL;
 			goto out;
 		}
+#ifdef CONFIG_SMP
+		s->s_files = alloc_percpu(struct list_head);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+		}
+#else
 		INIT_LIST_HEAD(&s->s_files);
+#endif
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
  */
 static inline void destroy_super(struct super_block *s)
 {
+#ifdef CONFIG_SMP
+	free_percpu(s->s_files);
+#endif
 	security_sb_free(s);
 	kfree(s->s_subtype);
 	kfree(s->s_options);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e65add0f163..76041b614758 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -920,6 +920,9 @@ struct file {
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 	spinlock_t		f_lock;  /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+	int			f_sb_list_cpu;
+#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1334,7 +1337,11 @@ struct super_block {
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+	struct list_head __percpu *s_files;
+#else
 	struct list_head	s_files;
+#endif
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
-- 
cgit v1.2.3-59-g8ed1b


From 56385a12d9bb9e173751f74b6c430742018cafc0 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 18 Aug 2010 14:08:17 +0200
Subject: ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay
 parameter)

With some hardware combinations, the PCM interrupts are acknowledged
before the period boundary from the emu10k1 chip. The midlevel PCM code
gets confused and the playback stream is interrupted.

It seems that the interrupt processing shift by 2 samples is enough
to fix this issue. This default value does not harm other,
non-affected hardware.

More information: Kernel bugzilla bug#16300

[A copmile warning fixed by tiwai]

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/emu10k1.h     |  1 +
 sound/core/pcm_native.c     |  4 ++++
 sound/pci/emu10k1/emu10k1.c |  4 ++++
 sound/pci/emu10k1/emupcm.c  | 30 ++++++++++++++++++++++++++----
 sound/pci/emu10k1/memory.c  |  4 +++-
 5 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 6a664c3f7c1e..7dc97d12253c 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1707,6 +1707,7 @@ struct snd_emu10k1 {
 	unsigned int card_type;			/* EMU10K1_CARD_* */
 	unsigned int ecard_ctrl;		/* ecard control bits */
 	unsigned long dma_mask;			/* PCI DMA mask */
+	unsigned int delay_pcm_irq;		/* in samples */
 	int max_cache_pages;			/* max memory size / PAGE_SIZE */
 	struct snd_dma_buffer silent_page;	/* silent page */
 	struct snd_dma_buffer ptb_pages;	/* page table pages */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a3b2a6479246..134fc6c2e08d 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push)
 {
 	if (substream->runtime->trigger_master != substream)
 		return 0;
+	/* some drivers might use hw_ptr to recover from the pause -
+	   update the hw_ptr now */
+	if (push)
+		snd_pcm_update_hw_ptr(substream);
 	/* The jiffies check in snd_pcm_update_hw_ptr*() is done by
 	 * a delta betwen the current jiffies, this gives a large enough
 	 * delta, effectively to skip the check once.
diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index 4203782d7cb7..aff8387c45cf 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64};
 static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128};
 static int enable_ir[SNDRV_CARDS];
 static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */
+static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2};
 
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard.");
@@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444);
 MODULE_PARM_DESC(enable_ir, "Enable IR.");
 module_param_array(subsystem, uint, NULL, 0444);
 MODULE_PARM_DESC(subsystem, "Force card subsystem model.");
+module_param_array(delay_pcm_irq, uint, NULL, 0444);
+MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0).");
 /*
  * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value  Model:SB0400
  */
@@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci,
 				      &emu)) < 0)
 		goto error;
 	card->private_data = emu;
+	emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f;
 	if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0)
 		goto error;
 	if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0)
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 55b83ef73c63..622bace148e3 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 		evoice->epcm->ccca_start_addr = start_addr + ccis;
 		if (extra) {
 			start_addr += ccis;
-			end_addr += ccis;
+			end_addr += ccis + emu->delay_pcm_irq;
 		}
 		if (stereo && !extra) {
 			snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK);
@@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 	/* Assumption that PT is already 0 so no harm overwriting */
 	snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]);
 	snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24));
-	snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24));
+	snd_emu10k1_ptr_write(emu, PSST, voice,
+			(start_addr + (extra ? emu->delay_pcm_irq : 0)) |
+			(send_amount[2] << 24));
 	if (emu->card_capabilities->emu_model)
 		pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */
 	else 
@@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_
 	snd_emu10k1_ptr_write(emu, IP, voice, 0);
 }
 
+static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu,
+		struct snd_emu10k1_pcm *epcm,
+		struct snd_pcm_substream *substream,
+		struct snd_pcm_runtime *runtime)
+{
+	unsigned int ptr, period_pos;
+
+	/* try to sychronize the current position for the interrupt
+	   source voice */
+	period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt;
+	period_pos %= runtime->period_size;
+	ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number);
+	ptr &= ~0x00ffffff;
+	ptr |= epcm->ccca_start_addr + period_pos;
+	snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr);
+}
+
 static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
 				        int cmd)
 {
@@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
 		/* follow thru */
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 	case SNDRV_PCM_TRIGGER_RESUME:
+		if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE)
+			snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime);
 		mix = &emu->pcm_mixer[substream->number];
 		snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix);
 		snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix);
@@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream *
 #endif
 	/*
 	printk(KERN_DEBUG
-	       "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n",
-	       ptr, runtime->buffer_size, runtime->period_size);
+	       "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n",
+	       (long)ptr, (long)runtime->buffer_size,
+	       (long)runtime->period_size);
 	*/
 	return ptr;
 }
diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index ffb1ddb8dc28..957a311514c8 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst
 	if (snd_BUG_ON(!hdr))
 		return NULL;
 
+	idx = runtime->period_size >= runtime->buffer_size ?
+					(emu->delay_pcm_irq * 2) : 0;
 	mutex_lock(&hdr->block_mutex);
-	blk = search_empty(emu, runtime->dma_bytes);
+	blk = search_empty(emu, runtime->dma_bytes + idx);
 	if (blk == NULL) {
 		mutex_unlock(&hdr->block_mutex);
 		return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From bd76af0f87f7a1815b311bde269a3f18305b3169 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 18 Aug 2010 14:16:54 +0200
Subject: ALSA: pcm midlevel code - add time check for double interrupt
 acknowledge

The current code in pcm_lib.c do all checks using only the position
in the ring buffer. Unfortunately, where the interrupts gets delayed or
merged into one, we need another timing source to check when the
buffer size boundary overlaps to avoid the wrong updating of the
ring buffer pointers.

This code uses jiffies to check the right time window without any
performance impact.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |  1 +
 sound/core/pcm_lib.c    | 14 +++++++++-----
 sound/core/pcm_native.c |  2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 85f1c6bf8566..dfd9b76b1853 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -278,6 +278,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
 	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
 	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
+	unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */
 	snd_pcm_sframes_t delay;	/* extra delay; typically FIFO size */
 
 	/* -- HW params -- */
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index e23e0e7ab26f..a1707cca9c66 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -334,11 +334,15 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
 		/* delta = "expected next hw_ptr" for in_interrupt != 0 */
 		delta = runtime->hw_ptr_interrupt + runtime->period_size;
 		if (delta > new_hw_ptr) {
-			hw_base += runtime->buffer_size;
-			if (hw_base >= runtime->boundary)
-				hw_base = 0;
-			new_hw_ptr = hw_base + pos;
-			goto __delta;
+			/* check for double acknowledged interrupts */
+			hdelta = jiffies - runtime->hw_ptr_jiffies;
+			if (hdelta > runtime->hw_ptr_buffer_jiffies/2) {
+				hw_base += runtime->buffer_size;
+				if (hw_base >= runtime->boundary)
+					hw_base = 0;
+				new_hw_ptr = hw_base + pos;
+				goto __delta;
+			}
 		}
 	}
 	/* new_hw_ptr might be lower than old_hw_ptr in case when */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 134fc6c2e08d..e2e73895db12 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -864,6 +864,8 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state)
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_trigger_tstamp(substream);
 	runtime->hw_ptr_jiffies = jiffies;
+	runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / 
+							    runtime->rate;
 	runtime->status->state = state;
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
-- 
cgit v1.2.3-59-g8ed1b


From d15ca3203754359cfe5d18910722d3089b204cc4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Aug 2010 18:55:33 +0100
Subject: Fix the declaration of sys_execve() in asm-generic/syscalls.h

Fix the declaration of sys_execve() in asm-generic/syscalls.h to have
various consts applied to its pointers.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/syscalls.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index df84e3b04555..d89dec864d42 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs);
 #endif
 
 #ifndef sys_execve
-asmlinkage long sys_execve(char __user *filename, char __user * __user *argv,
-			char __user * __user *envp, struct pt_regs *regs);
+asmlinkage long sys_execve(const char __user *filename,
+			   const char __user *const __user *argv,
+			   const char __user *const __user *envp,
+			   struct pt_regs *regs);
 #endif
 
 #ifndef sys_mmap2
-- 
cgit v1.2.3-59-g8ed1b


From a49f37eed22b74221f271811ea41323654e40dad Mon Sep 17 00:00:00 2001
From: Sachin Sanap <ssanap@marvell.com>
Date: Fri, 13 Aug 2010 21:22:49 +0000
Subject: net: add Fast Ethernet driver for PXA168.

Signed-off-by: Sachin Sanap <ssanap@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig        |   10 +
 drivers/net/Makefile       |    1 +
 drivers/net/pxa168_eth.c   | 1666 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pxa168_eth.h |   30 +
 4 files changed, 1707 insertions(+)
 create mode 100644 drivers/net/pxa168_eth.c
 create mode 100644 include/linux/pxa168_eth.h

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ebe68395ecf8..fe581566cb26 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -928,6 +928,16 @@ config SMC91X
 	  The module will be called smc91x.  If you want to compile it as a
 	  module, say M here and read <file:Documentation/kbuild/modules.txt>.
 
+config PXA168_ETH
+	tristate "Marvell pxa168 ethernet support"
+	depends on CPU_PXA168
+	select PHYLIB
+	help
+	  This driver supports the pxa168 Ethernet ports.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called pxa168_eth.
+
 config NET_NETX
 	tristate "NetX Ethernet support"
 	select MII
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 56e8c27f77ce..3e8f150c4b14 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -244,6 +244,7 @@ obj-$(CONFIG_MYRI10GE) += myri10ge/
 obj-$(CONFIG_SMC91X) += smc91x.o
 obj-$(CONFIG_SMC911X) += smc911x.o
 obj-$(CONFIG_SMSC911X) += smsc911x.o
+obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
 obj-$(CONFIG_BFIN_MAC) += bfin_mac.o
 obj-$(CONFIG_DM9000) += dm9000.o
 obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o
diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
new file mode 100644
index 000000000000..ecc64d750cce
--- /dev/null
+++ b/drivers/net/pxa168_eth.c
@@ -0,0 +1,1666 @@
+/*
+ * PXA168 ethernet driver.
+ * Most of the code is derived from mv643xx ethernet driver.
+ *
+ * Copyright (C) 2010 Marvell International Ltd.
+ *		Sachin Sanap <ssanap@marvell.com>
+ *		Philip Rakity <prakity@marvell.com>
+ *		Mark Brown <markb@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/etherdevice.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/clk.h>
+#include <linux/phy.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <asm/cacheflush.h>
+#include <linux/pxa168_eth.h>
+
+#define DRIVER_NAME	"pxa168-eth"
+#define DRIVER_VERSION	"0.3"
+
+/*
+ * Registers
+ */
+
+#define PHY_ADDRESS		0x0000
+#define SMI			0x0010
+#define PORT_CONFIG		0x0400
+#define PORT_CONFIG_EXT		0x0408
+#define PORT_COMMAND		0x0410
+#define PORT_STATUS		0x0418
+#define HTPR			0x0428
+#define SDMA_CONFIG		0x0440
+#define SDMA_CMD		0x0448
+#define INT_CAUSE		0x0450
+#define INT_W_CLEAR		0x0454
+#define INT_MASK		0x0458
+#define ETH_F_RX_DESC_0		0x0480
+#define ETH_C_RX_DESC_0		0x04A0
+#define ETH_C_TX_DESC_1		0x04E4
+
+/* smi register */
+#define SMI_BUSY		(1 << 28)	/* 0 - Write, 1 - Read  */
+#define SMI_R_VALID		(1 << 27)	/* 0 - Write, 1 - Read  */
+#define SMI_OP_W		(0 << 26)	/* Write operation      */
+#define SMI_OP_R		(1 << 26)	/* Read operation */
+
+#define PHY_WAIT_ITERATIONS	10
+
+#define PXA168_ETH_PHY_ADDR_DEFAULT	0
+/* RX & TX descriptor command */
+#define BUF_OWNED_BY_DMA	(1 << 31)
+
+/* RX descriptor status */
+#define RX_EN_INT		(1 << 23)
+#define RX_FIRST_DESC		(1 << 17)
+#define RX_LAST_DESC		(1 << 16)
+#define RX_ERROR		(1 << 15)
+
+/* TX descriptor command */
+#define TX_EN_INT		(1 << 23)
+#define TX_GEN_CRC		(1 << 22)
+#define TX_ZERO_PADDING		(1 << 18)
+#define TX_FIRST_DESC		(1 << 17)
+#define TX_LAST_DESC		(1 << 16)
+#define TX_ERROR		(1 << 15)
+
+/* SDMA_CMD */
+#define SDMA_CMD_AT		(1 << 31)
+#define SDMA_CMD_TXDL		(1 << 24)
+#define SDMA_CMD_TXDH		(1 << 23)
+#define SDMA_CMD_AR		(1 << 15)
+#define SDMA_CMD_ERD		(1 << 7)
+
+/* Bit definitions of the Port Config Reg */
+#define PCR_HS			(1 << 12)
+#define PCR_EN			(1 << 7)
+#define PCR_PM			(1 << 0)
+
+/* Bit definitions of the Port Config Extend Reg */
+#define PCXR_2BSM		(1 << 28)
+#define PCXR_DSCP_EN		(1 << 21)
+#define PCXR_MFL_1518		(0 << 14)
+#define PCXR_MFL_1536		(1 << 14)
+#define PCXR_MFL_2048		(2 << 14)
+#define PCXR_MFL_64K		(3 << 14)
+#define PCXR_FLP		(1 << 11)
+#define PCXR_PRIO_TX_OFF	3
+#define PCXR_TX_HIGH_PRI	(7 << PCXR_PRIO_TX_OFF)
+
+/* Bit definitions of the SDMA Config Reg */
+#define SDCR_BSZ_OFF		12
+#define SDCR_BSZ8		(3 << SDCR_BSZ_OFF)
+#define SDCR_BSZ4		(2 << SDCR_BSZ_OFF)
+#define SDCR_BSZ2		(1 << SDCR_BSZ_OFF)
+#define SDCR_BSZ1		(0 << SDCR_BSZ_OFF)
+#define SDCR_BLMR		(1 << 6)
+#define SDCR_BLMT		(1 << 7)
+#define SDCR_RIFB		(1 << 9)
+#define SDCR_RC_OFF		2
+#define SDCR_RC_MAX_RETRANS	(0xf << SDCR_RC_OFF)
+
+/*
+ * Bit definitions of the Interrupt Cause Reg
+ * and Interrupt MASK Reg is the same
+ */
+#define ICR_RXBUF		(1 << 0)
+#define ICR_TXBUF_H		(1 << 2)
+#define ICR_TXBUF_L		(1 << 3)
+#define ICR_TXEND_H		(1 << 6)
+#define ICR_TXEND_L		(1 << 7)
+#define ICR_RXERR		(1 << 8)
+#define ICR_TXERR_H		(1 << 10)
+#define ICR_TXERR_L		(1 << 11)
+#define ICR_TX_UDR		(1 << 13)
+#define ICR_MII_CH		(1 << 28)
+
+#define ALL_INTS (ICR_TXBUF_H  | ICR_TXBUF_L  | ICR_TX_UDR |\
+				ICR_TXERR_H  | ICR_TXERR_L |\
+				ICR_TXEND_H  | ICR_TXEND_L |\
+				ICR_RXBUF | ICR_RXERR  | ICR_MII_CH)
+
+#define ETH_HW_IP_ALIGN		2	/* hw aligns IP header */
+
+#define NUM_RX_DESCS		64
+#define NUM_TX_DESCS		64
+
+#define HASH_ADD		0
+#define HASH_DELETE		1
+#define HASH_ADDR_TABLE_SIZE	0x4000	/* 16K (1/2K address - PCR_HS == 1) */
+#define HOP_NUMBER		12
+
+/* Bit definitions for Port status */
+#define PORT_SPEED_100		(1 << 0)
+#define FULL_DUPLEX		(1 << 1)
+#define FLOW_CONTROL_ENABLED	(1 << 2)
+#define LINK_UP			(1 << 3)
+
+/* Bit definitions for work to be done */
+#define WORK_LINK		(1 << 0)
+#define WORK_TX_DONE		(1 << 1)
+
+/*
+ * Misc definitions.
+ */
+#define SKB_DMA_REALIGN		((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
+
+struct rx_desc {
+	u32 cmd_sts;		/* Descriptor command status            */
+	u16 byte_cnt;		/* Descriptor buffer byte count         */
+	u16 buf_size;		/* Buffer size                          */
+	u32 buf_ptr;		/* Descriptor buffer pointer            */
+	u32 next_desc_ptr;	/* Next descriptor pointer              */
+};
+
+struct tx_desc {
+	u32 cmd_sts;		/* Command/status field                 */
+	u16 reserved;
+	u16 byte_cnt;		/* buffer byte count                    */
+	u32 buf_ptr;		/* pointer to buffer for this descriptor */
+	u32 next_desc_ptr;	/* Pointer to next descriptor           */
+};
+
+struct pxa168_eth_private {
+	int port_num;		/* User Ethernet port number    */
+
+	int rx_resource_err;	/* Rx ring resource error flag */
+
+	/* Next available and first returning Rx resource */
+	int rx_curr_desc_q, rx_used_desc_q;
+
+	/* Next available and first returning Tx resource */
+	int tx_curr_desc_q, tx_used_desc_q;
+
+	struct rx_desc *p_rx_desc_area;
+	dma_addr_t rx_desc_dma;
+	int rx_desc_area_size;
+	struct sk_buff **rx_skb;
+
+	struct tx_desc *p_tx_desc_area;
+	dma_addr_t tx_desc_dma;
+	int tx_desc_area_size;
+	struct sk_buff **tx_skb;
+
+	struct work_struct tx_timeout_task;
+
+	struct net_device *dev;
+	struct napi_struct napi;
+	u8 work_todo;
+	int skb_size;
+
+	struct net_device_stats stats;
+	/* Size of Tx Ring per queue */
+	int tx_ring_size;
+	/* Number of tx descriptors in use */
+	int tx_desc_count;
+	/* Size of Rx Ring per queue */
+	int rx_ring_size;
+	/* Number of rx descriptors in use */
+	int rx_desc_count;
+
+	/*
+	 * Used in case RX Ring is empty, which can occur when
+	 * system does not have resources (skb's)
+	 */
+	struct timer_list timeout;
+	struct mii_bus *smi_bus;
+	struct phy_device *phy;
+
+	/* clock */
+	struct clk *clk;
+	struct pxa168_eth_platform_data *pd;
+	/*
+	 * Ethernet controller base address.
+	 */
+	void __iomem *base;
+
+	/* Pointer to the hardware address filter table */
+	void *htpr;
+	dma_addr_t htpr_dma;
+};
+
+struct addr_table_entry {
+	__le32 lo;
+	__le32 hi;
+};
+
+/* Bit fields of a Hash Table Entry */
+enum hash_table_entry {
+	HASH_ENTRY_VALID = 1,
+	SKIP = 2,
+	HASH_ENTRY_RECEIVE_DISCARD = 4,
+	HASH_ENTRY_RECEIVE_DISCARD_BIT = 2
+};
+
+static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static int pxa168_init_hw(struct pxa168_eth_private *pep);
+static void eth_port_reset(struct net_device *dev);
+static void eth_port_start(struct net_device *dev);
+static int pxa168_eth_open(struct net_device *dev);
+static int pxa168_eth_stop(struct net_device *dev);
+static int ethernet_phy_setup(struct net_device *dev);
+
+static inline u32 rdl(struct pxa168_eth_private *pep, int offset)
+{
+	return readl(pep->base + offset);
+}
+
+static inline void wrl(struct pxa168_eth_private *pep, int offset, u32 data)
+{
+	writel(data, pep->base + offset);
+}
+
+static void abort_dma(struct pxa168_eth_private *pep)
+{
+	int delay;
+	int max_retries = 40;
+
+	do {
+		wrl(pep, SDMA_CMD, SDMA_CMD_AR | SDMA_CMD_AT);
+		udelay(100);
+
+		delay = 10;
+		while ((rdl(pep, SDMA_CMD) & (SDMA_CMD_AR | SDMA_CMD_AT))
+		       && delay-- > 0) {
+			udelay(10);
+		}
+	} while (max_retries-- > 0 && delay <= 0);
+
+	if (max_retries <= 0)
+		printk(KERN_ERR "%s : DMA Stuck\n", __func__);
+}
+
+static int ethernet_phy_get(struct pxa168_eth_private *pep)
+{
+	unsigned int reg_data;
+
+	reg_data = rdl(pep, PHY_ADDRESS);
+
+	return (reg_data >> (5 * pep->port_num)) & 0x1f;
+}
+
+static void ethernet_phy_set_addr(struct pxa168_eth_private *pep, int phy_addr)
+{
+	u32 reg_data;
+	int addr_shift = 5 * pep->port_num;
+
+	reg_data = rdl(pep, PHY_ADDRESS);
+	reg_data &= ~(0x1f << addr_shift);
+	reg_data |= (phy_addr & 0x1f) << addr_shift;
+	wrl(pep, PHY_ADDRESS, reg_data);
+}
+
+static void ethernet_phy_reset(struct pxa168_eth_private *pep)
+{
+	int data;
+
+	data = phy_read(pep->phy, MII_BMCR);
+	if (data < 0)
+		return;
+
+	data |= BMCR_RESET;
+	if (phy_write(pep->phy, MII_BMCR, data) < 0)
+		return;
+
+	do {
+		data = phy_read(pep->phy, MII_BMCR);
+	} while (data >= 0 && data & BMCR_RESET);
+}
+
+static void rxq_refill(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct rx_desc *p_used_rx_desc;
+	int used_rx_desc;
+
+	while (pep->rx_desc_count < pep->rx_ring_size) {
+		int size;
+
+		skb = dev_alloc_skb(pep->skb_size);
+		if (!skb)
+			break;
+		if (SKB_DMA_REALIGN)
+			skb_reserve(skb, SKB_DMA_REALIGN);
+		pep->rx_desc_count++;
+		/* Get 'used' Rx descriptor */
+		used_rx_desc = pep->rx_used_desc_q;
+		p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc];
+		size = skb->end - skb->data;
+		p_used_rx_desc->buf_ptr = dma_map_single(NULL,
+							 skb->data,
+							 size,
+							 DMA_FROM_DEVICE);
+		p_used_rx_desc->buf_size = size;
+		pep->rx_skb[used_rx_desc] = skb;
+
+		/* Return the descriptor to DMA ownership */
+		wmb();
+		p_used_rx_desc->cmd_sts = BUF_OWNED_BY_DMA | RX_EN_INT;
+		wmb();
+
+		/* Move the used descriptor pointer to the next descriptor */
+		pep->rx_used_desc_q = (used_rx_desc + 1) % pep->rx_ring_size;
+
+		/* Any Rx return cancels the Rx resource error status */
+		pep->rx_resource_err = 0;
+
+		skb_reserve(skb, ETH_HW_IP_ALIGN);
+	}
+
+	/*
+	 * If RX ring is empty of SKB, set a timer to try allocating
+	 * again at a later time.
+	 */
+	if (pep->rx_desc_count == 0) {
+		pep->timeout.expires = jiffies + (HZ / 10);
+		add_timer(&pep->timeout);
+	}
+}
+
+static inline void rxq_refill_timer_wrapper(unsigned long data)
+{
+	struct pxa168_eth_private *pep = (void *)data;
+	napi_schedule(&pep->napi);
+}
+
+static inline u8 flip_8_bits(u8 x)
+{
+	return (((x) & 0x01) << 3) | (((x) & 0x02) << 1)
+	    | (((x) & 0x04) >> 1) | (((x) & 0x08) >> 3)
+	    | (((x) & 0x10) << 3) | (((x) & 0x20) << 1)
+	    | (((x) & 0x40) >> 1) | (((x) & 0x80) >> 3);
+}
+
+static void nibble_swap_every_byte(unsigned char *mac_addr)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac_addr[i] = ((mac_addr[i] & 0x0f) << 4) |
+				((mac_addr[i] & 0xf0) >> 4);
+	}
+}
+
+static void inverse_every_nibble(unsigned char *mac_addr)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = flip_8_bits(mac_addr[i]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * This function will calculate the hash function of the address.
+ * Inputs
+ * mac_addr_orig    - MAC address.
+ * Outputs
+ * return the calculated entry.
+ */
+static u32 hash_function(unsigned char *mac_addr_orig)
+{
+	u32 hash_result;
+	u32 addr0;
+	u32 addr1;
+	u32 addr2;
+	u32 addr3;
+	unsigned char mac_addr[ETH_ALEN];
+
+	/* Make a copy of MAC address since we are going to performe bit
+	 * operations on it
+	 */
+	memcpy(mac_addr, mac_addr_orig, ETH_ALEN);
+
+	nibble_swap_every_byte(mac_addr);
+	inverse_every_nibble(mac_addr);
+
+	addr0 = (mac_addr[5] >> 2) & 0x3f;
+	addr1 = (mac_addr[5] & 0x03) | (((mac_addr[4] & 0x7f)) << 2);
+	addr2 = ((mac_addr[4] & 0x80) >> 7) | mac_addr[3] << 1;
+	addr3 = (mac_addr[2] & 0xff) | ((mac_addr[1] & 1) << 8);
+
+	hash_result = (addr0 << 9) | (addr1 ^ addr2 ^ addr3);
+	hash_result = hash_result & 0x07ff;
+	return hash_result;
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * This function will add/del an entry to the address table.
+ * Inputs
+ * pep - ETHERNET .
+ * mac_addr - MAC address.
+ * skip - if 1, skip this address.Used in case of deleting an entry which is a
+ *	  part of chain in the hash table.We cant just delete the entry since
+ *	  that will break the chain.We need to defragment the tables time to
+ *	  time.
+ * rd   - 0 Discard packet upon match.
+ *	- 1 Receive packet upon match.
+ * Outputs
+ * address table entry is added/deleted.
+ * 0 if success.
+ * -ENOSPC if table full
+ */
+static int add_del_hash_entry(struct pxa168_eth_private *pep,
+			      unsigned char *mac_addr,
+			      u32 rd, u32 skip, int del)
+{
+	struct addr_table_entry *entry, *start;
+	u32 new_high;
+	u32 new_low;
+	u32 i;
+
+	new_low = (((mac_addr[1] >> 4) & 0xf) << 15)
+	    | (((mac_addr[1] >> 0) & 0xf) << 11)
+	    | (((mac_addr[0] >> 4) & 0xf) << 7)
+	    | (((mac_addr[0] >> 0) & 0xf) << 3)
+	    | (((mac_addr[3] >> 4) & 0x1) << 31)
+	    | (((mac_addr[3] >> 0) & 0xf) << 27)
+	    | (((mac_addr[2] >> 4) & 0xf) << 23)
+	    | (((mac_addr[2] >> 0) & 0xf) << 19)
+	    | (skip << SKIP) | (rd << HASH_ENTRY_RECEIVE_DISCARD_BIT)
+	    | HASH_ENTRY_VALID;
+
+	new_high = (((mac_addr[5] >> 4) & 0xf) << 15)
+	    | (((mac_addr[5] >> 0) & 0xf) << 11)
+	    | (((mac_addr[4] >> 4) & 0xf) << 7)
+	    | (((mac_addr[4] >> 0) & 0xf) << 3)
+	    | (((mac_addr[3] >> 5) & 0x7) << 0);
+
+	/*
+	 * Pick the appropriate table, start scanning for free/reusable
+	 * entries at the index obtained by hashing the specified MAC address
+	 */
+	start = (struct addr_table_entry *)(pep->htpr);
+	entry = start + hash_function(mac_addr);
+	for (i = 0; i < HOP_NUMBER; i++) {
+		if (!(le32_to_cpu(entry->lo) & HASH_ENTRY_VALID)) {
+			break;
+		} else {
+			/* if same address put in same position */
+			if (((le32_to_cpu(entry->lo) & 0xfffffff8) ==
+				(new_low & 0xfffffff8)) &&
+				(le32_to_cpu(entry->hi) == new_high)) {
+				break;
+			}
+		}
+		if (entry == start + 0x7ff)
+			entry = start;
+		else
+			entry++;
+	}
+
+	if (((le32_to_cpu(entry->lo) & 0xfffffff8) != (new_low & 0xfffffff8)) &&
+	    (le32_to_cpu(entry->hi) != new_high) && del)
+		return 0;
+
+	if (i == HOP_NUMBER) {
+		if (!del) {
+			printk(KERN_INFO "%s: table section is full, need to "
+					"move to 16kB implementation?\n",
+					 __FILE__);
+			return -ENOSPC;
+		} else
+			return 0;
+	}
+
+	/*
+	 * Update the selected entry
+	 */
+	if (del) {
+		entry->hi = 0;
+		entry->lo = 0;
+	} else {
+		entry->hi = cpu_to_le32(new_high);
+		entry->lo = cpu_to_le32(new_low);
+	}
+
+	return 0;
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ *  Create an addressTable entry from MAC address info
+ *  found in the specifed net_device struct
+ *
+ *  Input : pointer to ethernet interface network device structure
+ *  Output : N/A
+ */
+static void update_hash_table_mac_address(struct pxa168_eth_private *pep,
+					  unsigned char *oaddr,
+					  unsigned char *addr)
+{
+	/* Delete old entry */
+	if (oaddr)
+		add_del_hash_entry(pep, oaddr, 1, 0, HASH_DELETE);
+	/* Add new entry */
+	add_del_hash_entry(pep, addr, 1, 0, HASH_ADD);
+}
+
+static int init_hash_table(struct pxa168_eth_private *pep)
+{
+	/*
+	 * Hardware expects CPU to build a hash table based on a predefined
+	 * hash function and populate it based on hardware address. The
+	 * location of the hash table is identified by 32-bit pointer stored
+	 * in HTPR internal register. Two possible sizes exists for the hash
+	 * table 8kB (256kB of DRAM required (4 x 64 kB banks)) and 1/2kB
+	 * (16kB of DRAM required (4 x 4 kB banks)).We currently only support
+	 * 1/2kB.
+	 */
+	/* TODO: Add support for 8kB hash table and alternative hash
+	 * function.Driver can dynamically switch to them if the 1/2kB hash
+	 * table is full.
+	 */
+	if (pep->htpr == NULL) {
+		pep->htpr = dma_alloc_coherent(pep->dev->dev.parent,
+					      HASH_ADDR_TABLE_SIZE,
+					      &pep->htpr_dma, GFP_KERNEL);
+		if (pep->htpr == NULL)
+			return -ENOMEM;
+	}
+	memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE);
+	wrl(pep, HTPR, pep->htpr_dma);
+	return 0;
+}
+
+static void pxa168_eth_set_rx_mode(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct netdev_hw_addr *ha;
+	u32 val;
+
+	val = rdl(pep, PORT_CONFIG);
+	if (dev->flags & IFF_PROMISC)
+		val |= PCR_PM;
+	else
+		val &= ~PCR_PM;
+	wrl(pep, PORT_CONFIG, val);
+
+	/*
+	 * Remove the old list of MAC address and add dev->addr
+	 * and multicast address.
+	 */
+	memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE);
+	update_hash_table_mac_address(pep, NULL, dev->dev_addr);
+
+	netdev_for_each_mc_addr(ha, dev)
+		update_hash_table_mac_address(pep, NULL, ha->addr);
+}
+
+static int pxa168_eth_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sa = addr;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	unsigned char oldMac[ETH_ALEN];
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EINVAL;
+	memcpy(oldMac, dev->dev_addr, ETH_ALEN);
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	netif_addr_lock_bh(dev);
+	update_hash_table_mac_address(pep, oldMac, dev->dev_addr);
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static void eth_port_start(struct net_device *dev)
+{
+	unsigned int val = 0;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int tx_curr_desc, rx_curr_desc;
+
+	/* Perform PHY reset, if there is a PHY. */
+	if (pep->phy != NULL) {
+		struct ethtool_cmd cmd;
+
+		pxa168_get_settings(pep->dev, &cmd);
+		ethernet_phy_reset(pep);
+		pxa168_set_settings(pep->dev, &cmd);
+	}
+
+	/* Assignment of Tx CTRP of given queue */
+	tx_curr_desc = pep->tx_curr_desc_q;
+	wrl(pep, ETH_C_TX_DESC_1,
+	    (u32) ((struct tx_desc *)pep->tx_desc_dma + tx_curr_desc));
+
+	/* Assignment of Rx CRDP of given queue */
+	rx_curr_desc = pep->rx_curr_desc_q;
+	wrl(pep, ETH_C_RX_DESC_0,
+	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+
+	wrl(pep, ETH_F_RX_DESC_0,
+	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+
+	/* Clear all interrupts */
+	wrl(pep, INT_CAUSE, 0);
+
+	/* Enable all interrupts for receive, transmit and error. */
+	wrl(pep, INT_MASK, ALL_INTS);
+
+	val = rdl(pep, PORT_CONFIG);
+	val |= PCR_EN;
+	wrl(pep, PORT_CONFIG, val);
+
+	/* Start RX DMA engine */
+	val = rdl(pep, SDMA_CMD);
+	val |= SDMA_CMD_ERD;
+	wrl(pep, SDMA_CMD, val);
+}
+
+static void eth_port_reset(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	unsigned int val = 0;
+
+	/* Stop all interrupts for receive, transmit and error. */
+	wrl(pep, INT_MASK, 0);
+
+	/* Clear all interrupts */
+	wrl(pep, INT_CAUSE, 0);
+
+	/* Stop RX DMA */
+	val = rdl(pep, SDMA_CMD);
+	val &= ~SDMA_CMD_ERD;	/* abort dma command */
+
+	/* Abort any transmit and receive operations and put DMA
+	 * in idle state.
+	 */
+	abort_dma(pep);
+
+	/* Disable port */
+	val = rdl(pep, PORT_CONFIG);
+	val &= ~PCR_EN;
+	wrl(pep, PORT_CONFIG, val);
+}
+
+/*
+ * txq_reclaim - Free the tx desc data for completed descriptors
+ * If force is non-zero, frees uncompleted descriptors as well
+ */
+static int txq_reclaim(struct net_device *dev, int force)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct tx_desc *desc;
+	u32 cmd_sts;
+	struct sk_buff *skb;
+	int tx_index;
+	dma_addr_t addr;
+	int count;
+	int released = 0;
+
+	netif_tx_lock(dev);
+
+	pep->work_todo &= ~WORK_TX_DONE;
+	while (pep->tx_desc_count > 0) {
+		tx_index = pep->tx_used_desc_q;
+		desc = &pep->p_tx_desc_area[tx_index];
+		cmd_sts = desc->cmd_sts;
+		if (!force && (cmd_sts & BUF_OWNED_BY_DMA)) {
+			if (released > 0) {
+				goto txq_reclaim_end;
+			} else {
+				released = -1;
+				goto txq_reclaim_end;
+			}
+		}
+		pep->tx_used_desc_q = (tx_index + 1) % pep->tx_ring_size;
+		pep->tx_desc_count--;
+		addr = desc->buf_ptr;
+		count = desc->byte_cnt;
+		skb = pep->tx_skb[tx_index];
+		if (skb)
+			pep->tx_skb[tx_index] = NULL;
+
+		if (cmd_sts & TX_ERROR) {
+			if (net_ratelimit())
+				printk(KERN_ERR "%s: Error in TX\n", dev->name);
+			dev->stats.tx_errors++;
+		}
+		dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE);
+		if (skb)
+			dev_kfree_skb_irq(skb);
+		released++;
+	}
+txq_reclaim_end:
+	netif_tx_unlock(dev);
+	return released;
+}
+
+static void pxa168_eth_tx_timeout(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	printk(KERN_INFO "%s: TX timeout  desc_count %d\n",
+	       dev->name, pep->tx_desc_count);
+
+	schedule_work(&pep->tx_timeout_task);
+}
+
+static void pxa168_eth_tx_timeout_task(struct work_struct *work)
+{
+	struct pxa168_eth_private *pep = container_of(work,
+						 struct pxa168_eth_private,
+						 tx_timeout_task);
+	struct net_device *dev = pep->dev;
+	pxa168_eth_stop(dev);
+	pxa168_eth_open(dev);
+}
+
+static int rxq_process(struct net_device *dev, int budget)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int received_packets = 0;
+	struct sk_buff *skb;
+
+	while (budget-- > 0) {
+		int rx_next_curr_desc, rx_curr_desc, rx_used_desc;
+		struct rx_desc *rx_desc;
+		unsigned int cmd_sts;
+
+		/* Do not process Rx ring in case of Rx ring resource error */
+		if (pep->rx_resource_err)
+			break;
+		rx_curr_desc = pep->rx_curr_desc_q;
+		rx_used_desc = pep->rx_used_desc_q;
+		rx_desc = &pep->p_rx_desc_area[rx_curr_desc];
+		cmd_sts = rx_desc->cmd_sts;
+		rmb();
+		if (cmd_sts & (BUF_OWNED_BY_DMA))
+			break;
+		skb = pep->rx_skb[rx_curr_desc];
+		pep->rx_skb[rx_curr_desc] = NULL;
+
+		rx_next_curr_desc = (rx_curr_desc + 1) % pep->rx_ring_size;
+		pep->rx_curr_desc_q = rx_next_curr_desc;
+
+		/* Rx descriptors exhausted. */
+		/* Set the Rx ring resource error flag */
+		if (rx_next_curr_desc == rx_used_desc)
+			pep->rx_resource_err = 1;
+		pep->rx_desc_count--;
+		dma_unmap_single(NULL, rx_desc->buf_ptr,
+				 rx_desc->buf_size,
+				 DMA_FROM_DEVICE);
+		received_packets++;
+		/*
+		 * Update statistics.
+		 * Note byte count includes 4 byte CRC count
+		 */
+		stats->rx_packets++;
+		stats->rx_bytes += rx_desc->byte_cnt;
+		/*
+		 * In case received a packet without first / last bits on OR
+		 * the error summary bit is on, the packets needs to be droped.
+		 */
+		if (((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) !=
+		     (RX_FIRST_DESC | RX_LAST_DESC))
+		    || (cmd_sts & RX_ERROR)) {
+
+			stats->rx_dropped++;
+			if ((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) !=
+			    (RX_FIRST_DESC | RX_LAST_DESC)) {
+				if (net_ratelimit())
+					printk(KERN_ERR
+					       "%s: Rx pkt on multiple desc\n",
+					       dev->name);
+			}
+			if (cmd_sts & RX_ERROR)
+				stats->rx_errors++;
+			dev_kfree_skb_irq(skb);
+		} else {
+			/*
+			 * The -4 is for the CRC in the trailer of the
+			 * received packet
+			 */
+			skb_put(skb, rx_desc->byte_cnt - 4);
+			skb->protocol = eth_type_trans(skb, dev);
+			netif_receive_skb(skb);
+		}
+		dev->last_rx = jiffies;
+	}
+	/* Fill RX ring with skb's */
+	rxq_refill(dev);
+	return received_packets;
+}
+
+static int pxa168_eth_collect_events(struct pxa168_eth_private *pep,
+				     struct net_device *dev)
+{
+	u32 icr;
+	int ret = 0;
+
+	icr = rdl(pep, INT_CAUSE);
+	if (icr == 0)
+		return IRQ_NONE;
+
+	wrl(pep, INT_CAUSE, ~icr);
+	if (icr & (ICR_TXBUF_H | ICR_TXBUF_L)) {
+		pep->work_todo |= WORK_TX_DONE;
+		ret = 1;
+	}
+	if (icr & ICR_RXBUF)
+		ret = 1;
+	if (icr & ICR_MII_CH) {
+		pep->work_todo |= WORK_LINK;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void handle_link_event(struct pxa168_eth_private *pep)
+{
+	struct net_device *dev = pep->dev;
+	u32 port_status;
+	int speed;
+	int duplex;
+	int fc;
+
+	port_status = rdl(pep, PORT_STATUS);
+	if (!(port_status & LINK_UP)) {
+		if (netif_carrier_ok(dev)) {
+			printk(KERN_INFO "%s: link down\n", dev->name);
+			netif_carrier_off(dev);
+			txq_reclaim(dev, 1);
+		}
+		return;
+	}
+	if (port_status & PORT_SPEED_100)
+		speed = 100;
+	else
+		speed = 10;
+
+	duplex = (port_status & FULL_DUPLEX) ? 1 : 0;
+	fc = (port_status & FLOW_CONTROL_ENABLED) ? 1 : 0;
+	printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+	       "flow control %sabled\n", dev->name,
+	       speed, duplex ? "full" : "half", fc ? "en" : "dis");
+	if (!netif_carrier_ok(dev))
+		netif_carrier_on(dev);
+}
+
+static irqreturn_t pxa168_eth_int_handler(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (unlikely(!pxa168_eth_collect_events(pep, dev)))
+		return IRQ_NONE;
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	napi_schedule(&pep->napi);
+	return IRQ_HANDLED;
+}
+
+static void pxa168_eth_recalc_skb_size(struct pxa168_eth_private *pep)
+{
+	int skb_size;
+
+	/*
+	 * Reserve 2+14 bytes for an ethernet header (the hardware
+	 * automatically prepends 2 bytes of dummy data to each
+	 * received packet), 16 bytes for up to four VLAN tags, and
+	 * 4 bytes for the trailing FCS -- 36 bytes total.
+	 */
+	skb_size = pep->dev->mtu + 36;
+
+	/*
+	 * Make sure that the skb size is a multiple of 8 bytes, as
+	 * the lower three bits of the receive descriptor's buffer
+	 * size field are ignored by the hardware.
+	 */
+	pep->skb_size = (skb_size + 7) & ~7;
+
+	/*
+	 * If NET_SKB_PAD is smaller than a cache line,
+	 * netdev_alloc_skb() will cause skb->data to be misaligned
+	 * to a cache line boundary.  If this is the case, include
+	 * some extra space to allow re-aligning the data area.
+	 */
+	pep->skb_size += SKB_DMA_REALIGN;
+
+}
+
+static int set_port_config_ext(struct pxa168_eth_private *pep)
+{
+	int skb_size;
+
+	pxa168_eth_recalc_skb_size(pep);
+	if  (pep->skb_size <= 1518)
+		skb_size = PCXR_MFL_1518;
+	else if (pep->skb_size <= 1536)
+		skb_size = PCXR_MFL_1536;
+	else if (pep->skb_size <= 2048)
+		skb_size = PCXR_MFL_2048;
+	else
+		skb_size = PCXR_MFL_64K;
+
+	/* Extended Port Configuration */
+	wrl(pep,
+	    PORT_CONFIG_EXT, PCXR_2BSM | /* Two byte prefix aligns IP hdr */
+	    PCXR_DSCP_EN |		 /* Enable DSCP in IP */
+	    skb_size | PCXR_FLP |	 /* do not force link pass */
+	    PCXR_TX_HIGH_PRI);		 /* Transmit - high priority queue */
+
+	return 0;
+}
+
+static int pxa168_init_hw(struct pxa168_eth_private *pep)
+{
+	int err = 0;
+
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	wrl(pep, INT_CAUSE, 0);
+	/* Write to ICR to clear interrupts. */
+	wrl(pep, INT_W_CLEAR, 0);
+	/* Abort any transmit and receive operations and put DMA
+	 * in idle state.
+	 */
+	abort_dma(pep);
+	/* Initialize address hash table */
+	err = init_hash_table(pep);
+	if (err)
+		return err;
+	/* SDMA configuration */
+	wrl(pep, SDMA_CONFIG, SDCR_BSZ8 |	/* Burst size = 32 bytes */
+	    SDCR_RIFB |				/* Rx interrupt on frame */
+	    SDCR_BLMT |				/* Little endian transmit */
+	    SDCR_BLMR |				/* Little endian receive */
+	    SDCR_RC_MAX_RETRANS);		/* Max retransmit count */
+	/* Port Configuration */
+	wrl(pep, PORT_CONFIG, PCR_HS);		/* Hash size is 1/2kb */
+	set_port_config_ext(pep);
+
+	return err;
+}
+
+static int rxq_init(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct rx_desc *p_rx_desc;
+	int size = 0, i = 0;
+	int rx_desc_num = pep->rx_ring_size;
+
+	/* Allocate RX skb rings */
+	pep->rx_skb = kmalloc(sizeof(*pep->rx_skb) * pep->rx_ring_size,
+			     GFP_KERNEL);
+	if (!pep->rx_skb) {
+		printk(KERN_ERR "%s: Cannot alloc RX skb ring\n", dev->name);
+		return -ENOMEM;
+	}
+	/* Allocate RX ring */
+	pep->rx_desc_count = 0;
+	size = pep->rx_ring_size * sizeof(struct rx_desc);
+	pep->rx_desc_area_size = size;
+	pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						&pep->rx_desc_dma, GFP_KERNEL);
+	if (!pep->p_rx_desc_area) {
+		printk(KERN_ERR "%s: Cannot alloc RX ring (size %d bytes)\n",
+		       dev->name, size);
+		goto out;
+	}
+	memset((void *)pep->p_rx_desc_area, 0, size);
+	/* initialize the next_desc_ptr links in the Rx descriptors ring */
+	p_rx_desc = (struct rx_desc *)pep->p_rx_desc_area;
+	for (i = 0; i < rx_desc_num; i++) {
+		p_rx_desc[i].next_desc_ptr = pep->rx_desc_dma +
+		    ((i + 1) % rx_desc_num) * sizeof(struct rx_desc);
+	}
+	/* Save Rx desc pointer to driver struct. */
+	pep->rx_curr_desc_q = 0;
+	pep->rx_used_desc_q = 0;
+	pep->rx_desc_area_size = rx_desc_num * sizeof(struct rx_desc);
+	return 0;
+out:
+	kfree(pep->rx_skb);
+	return -ENOMEM;
+}
+
+static void rxq_deinit(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int curr;
+
+	/* Free preallocated skb's on RX rings */
+	for (curr = 0; pep->rx_desc_count && curr < pep->rx_ring_size; curr++) {
+		if (pep->rx_skb[curr]) {
+			dev_kfree_skb(pep->rx_skb[curr]);
+			pep->rx_desc_count--;
+		}
+	}
+	if (pep->rx_desc_count)
+		printk(KERN_ERR
+		       "Error in freeing Rx Ring. %d skb's still\n",
+		       pep->rx_desc_count);
+	/* Free RX ring */
+	if (pep->p_rx_desc_area)
+		dma_free_coherent(pep->dev->dev.parent, pep->rx_desc_area_size,
+				  pep->p_rx_desc_area, pep->rx_desc_dma);
+	kfree(pep->rx_skb);
+}
+
+static int txq_init(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct tx_desc *p_tx_desc;
+	int size = 0, i = 0;
+	int tx_desc_num = pep->tx_ring_size;
+
+	pep->tx_skb = kmalloc(sizeof(*pep->tx_skb) * pep->tx_ring_size,
+			     GFP_KERNEL);
+	if (!pep->tx_skb) {
+		printk(KERN_ERR "%s: Cannot alloc TX skb ring\n", dev->name);
+		return -ENOMEM;
+	}
+	/* Allocate TX ring */
+	pep->tx_desc_count = 0;
+	size = pep->tx_ring_size * sizeof(struct tx_desc);
+	pep->tx_desc_area_size = size;
+	pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						&pep->tx_desc_dma, GFP_KERNEL);
+	if (!pep->p_tx_desc_area) {
+		printk(KERN_ERR "%s: Cannot allocate Tx Ring (size %d bytes)\n",
+		       dev->name, size);
+		goto out;
+	}
+	memset((void *)pep->p_tx_desc_area, 0, pep->tx_desc_area_size);
+	/* Initialize the next_desc_ptr links in the Tx descriptors ring */
+	p_tx_desc = (struct tx_desc *)pep->p_tx_desc_area;
+	for (i = 0; i < tx_desc_num; i++) {
+		p_tx_desc[i].next_desc_ptr = pep->tx_desc_dma +
+		    ((i + 1) % tx_desc_num) * sizeof(struct tx_desc);
+	}
+	pep->tx_curr_desc_q = 0;
+	pep->tx_used_desc_q = 0;
+	pep->tx_desc_area_size = tx_desc_num * sizeof(struct tx_desc);
+	return 0;
+out:
+	kfree(pep->tx_skb);
+	return -ENOMEM;
+}
+
+static void txq_deinit(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	/* Free outstanding skb's on TX ring */
+	txq_reclaim(dev, 1);
+	BUG_ON(pep->tx_used_desc_q != pep->tx_curr_desc_q);
+	/* Free TX ring */
+	if (pep->p_tx_desc_area)
+		dma_free_coherent(pep->dev->dev.parent, pep->tx_desc_area_size,
+				  pep->p_tx_desc_area, pep->tx_desc_dma);
+	kfree(pep->tx_skb);
+}
+
+static int pxa168_eth_open(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int err;
+
+	err = request_irq(dev->irq, pxa168_eth_int_handler,
+			  IRQF_DISABLED, dev->name, dev);
+	if (err) {
+		dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n");
+		return -EAGAIN;
+	}
+	pep->rx_resource_err = 0;
+	err = rxq_init(dev);
+	if (err != 0)
+		goto out_free_irq;
+	err = txq_init(dev);
+	if (err != 0)
+		goto out_free_rx_skb;
+	pep->rx_used_desc_q = 0;
+	pep->rx_curr_desc_q = 0;
+
+	/* Fill RX ring with skb's */
+	rxq_refill(dev);
+	pep->rx_used_desc_q = 0;
+	pep->rx_curr_desc_q = 0;
+	netif_carrier_off(dev);
+	eth_port_start(dev);
+	napi_enable(&pep->napi);
+	return 0;
+out_free_rx_skb:
+	rxq_deinit(dev);
+out_free_irq:
+	free_irq(dev->irq, dev);
+	return err;
+}
+
+static int pxa168_eth_stop(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	eth_port_reset(dev);
+
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	wrl(pep, INT_CAUSE, 0);
+	/* Write to ICR to clear interrupts. */
+	wrl(pep, INT_W_CLEAR, 0);
+	napi_disable(&pep->napi);
+	del_timer_sync(&pep->timeout);
+	netif_carrier_off(dev);
+	free_irq(dev->irq, dev);
+	rxq_deinit(dev);
+	txq_deinit(dev);
+
+	return 0;
+}
+
+static int pxa168_eth_change_mtu(struct net_device *dev, int mtu)
+{
+	int retval;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if ((mtu > 9500) || (mtu < 68))
+		return -EINVAL;
+
+	dev->mtu = mtu;
+	retval = set_port_config_ext(pep);
+
+	if (!netif_running(dev))
+		return 0;
+
+	/*
+	 * Stop and then re-open the interface. This will allocate RX
+	 * skbs of the new MTU.
+	 * There is a possible danger that the open will not succeed,
+	 * due to memory being full.
+	 */
+	pxa168_eth_stop(dev);
+	if (pxa168_eth_open(dev)) {
+		dev_printk(KERN_ERR, &dev->dev,
+			   "fatal error on re-opening device after "
+			   "MTU change\n");
+	}
+
+	return 0;
+}
+
+static int eth_alloc_tx_desc_index(struct pxa168_eth_private *pep)
+{
+	int tx_desc_curr;
+
+	tx_desc_curr = pep->tx_curr_desc_q;
+	pep->tx_curr_desc_q = (tx_desc_curr + 1) % pep->tx_ring_size;
+	BUG_ON(pep->tx_curr_desc_q == pep->tx_used_desc_q);
+	pep->tx_desc_count++;
+
+	return tx_desc_curr;
+}
+
+static int pxa168_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct pxa168_eth_private *pep =
+	    container_of(napi, struct pxa168_eth_private, napi);
+	struct net_device *dev = pep->dev;
+	int work_done = 0;
+
+	if (unlikely(pep->work_todo & WORK_LINK)) {
+		pep->work_todo &= ~(WORK_LINK);
+		handle_link_event(pep);
+	}
+	/*
+	 * We call txq_reclaim every time since in NAPI interupts are disabled
+	 * and due to this we miss the TX_DONE interrupt,which is not updated in
+	 * interrupt status register.
+	 */
+	txq_reclaim(dev, 0);
+	if (netif_queue_stopped(dev)
+	    && pep->tx_ring_size - pep->tx_desc_count > 1) {
+		netif_wake_queue(dev);
+	}
+	work_done = rxq_process(dev, budget);
+	if (work_done < budget) {
+		napi_complete(napi);
+		wrl(pep, INT_MASK, ALL_INTS);
+	}
+
+	return work_done;
+}
+
+static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct tx_desc *desc;
+	int tx_index;
+	int length;
+
+	tx_index = eth_alloc_tx_desc_index(pep);
+	desc = &pep->p_tx_desc_area[tx_index];
+	length = skb->len;
+	pep->tx_skb[tx_index] = skb;
+	desc->byte_cnt = length;
+	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+	wmb();
+	desc->cmd_sts = BUF_OWNED_BY_DMA | TX_GEN_CRC | TX_FIRST_DESC |
+			TX_ZERO_PADDING | TX_LAST_DESC | TX_EN_INT;
+	wmb();
+	wrl(pep, SDMA_CMD, SDMA_CMD_TXDH | SDMA_CMD_ERD);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+	dev->trans_start = jiffies;
+	if (pep->tx_ring_size - pep->tx_desc_count <= 1) {
+		/* We handled the current skb, but now we are out of space.*/
+		netif_stop_queue(dev);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int smi_wait_ready(struct pxa168_eth_private *pep)
+{
+	int i = 0;
+
+	/* wait for the SMI register to become available */
+	for (i = 0; rdl(pep, SMI) & SMI_BUSY; i++) {
+		if (i == PHY_WAIT_ITERATIONS)
+			return -ETIMEDOUT;
+		msleep(10);
+	}
+
+	return 0;
+}
+
+static int pxa168_smi_read(struct mii_bus *bus, int phy_addr, int regnum)
+{
+	struct pxa168_eth_private *pep = bus->priv;
+	int i = 0;
+	int val;
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+	wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | SMI_OP_R);
+	/* now wait for the data to be valid */
+	for (i = 0; !((val = rdl(pep, SMI)) & SMI_R_VALID); i++) {
+		if (i == PHY_WAIT_ITERATIONS) {
+			printk(KERN_WARNING
+				"pxa168_eth: SMI bus read not valid\n");
+			return -ENODEV;
+		}
+		msleep(10);
+	}
+
+	return val & 0xffff;
+}
+
+static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum,
+			    u16 value)
+{
+	struct pxa168_eth_private *pep = bus->priv;
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) |
+	    SMI_OP_W | (value & 0xffff));
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_ERR "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
+			       int cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	if (pep->phy != NULL)
+		return phy_mii_ioctl(pep->phy, if_mii(ifr), cmd);
+
+	return -EOPNOTSUPP;
+}
+
+static struct phy_device *phy_scan(struct pxa168_eth_private *pep, int phy_addr)
+{
+	struct mii_bus *bus = pep->smi_bus;
+	struct phy_device *phydev;
+	int start;
+	int num;
+	int i;
+
+	if (phy_addr == PXA168_ETH_PHY_ADDR_DEFAULT) {
+		/* Scan entire range */
+		start = ethernet_phy_get(pep);
+		num = 32;
+	} else {
+		/* Use phy addr specific to platform */
+		start = phy_addr & 0x1f;
+		num = 1;
+	}
+	phydev = NULL;
+	for (i = 0; i < num; i++) {
+		int addr = (start + i) & 0x1f;
+		if (bus->phy_map[addr] == NULL)
+			mdiobus_scan(bus, addr);
+
+		if (phydev == NULL) {
+			phydev = bus->phy_map[addr];
+			if (phydev != NULL)
+				ethernet_phy_set_addr(pep, addr);
+		}
+	}
+
+	return phydev;
+}
+
+static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex)
+{
+	struct phy_device *phy = pep->phy;
+	ethernet_phy_reset(pep);
+
+	phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII);
+
+	if (speed == 0) {
+		phy->autoneg = AUTONEG_ENABLE;
+		phy->speed = 0;
+		phy->duplex = 0;
+		phy->supported &= PHY_BASIC_FEATURES;
+		phy->advertising = phy->supported | ADVERTISED_Autoneg;
+	} else {
+		phy->autoneg = AUTONEG_DISABLE;
+		phy->advertising = 0;
+		phy->speed = speed;
+		phy->duplex = duplex;
+	}
+	phy_start_aneg(phy);
+}
+
+static int ethernet_phy_setup(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (pep->pd != NULL) {
+		if (pep->pd->init)
+			pep->pd->init();
+	}
+	pep->phy = phy_scan(pep, pep->pd->phy_addr & 0x1f);
+	if (pep->phy != NULL)
+		phy_init(pep, pep->pd->speed, pep->pd->duplex);
+	update_hash_table_mac_address(pep, NULL, dev->dev_addr);
+
+	return 0;
+}
+
+static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int err;
+
+	err = phy_read_status(pep->phy);
+	if (err == 0)
+		err = phy_ethtool_gset(pep->phy, cmd);
+
+	return err;
+}
+
+static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	return phy_ethtool_sset(pep->phy, cmd);
+}
+
+static void pxa168_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *info)
+{
+	strncpy(info->driver, DRIVER_NAME, 32);
+	strncpy(info->version, DRIVER_VERSION, 32);
+	strncpy(info->fw_version, "N/A", 32);
+	strncpy(info->bus_info, "N/A", 32);
+}
+
+static u32 pxa168_get_link(struct net_device *dev)
+{
+	return !!netif_carrier_ok(dev);
+}
+
+static const struct ethtool_ops pxa168_ethtool_ops = {
+	.get_settings = pxa168_get_settings,
+	.set_settings = pxa168_set_settings,
+	.get_drvinfo = pxa168_get_drvinfo,
+	.get_link = pxa168_get_link,
+};
+
+static const struct net_device_ops pxa168_eth_netdev_ops = {
+	.ndo_open = pxa168_eth_open,
+	.ndo_stop = pxa168_eth_stop,
+	.ndo_start_xmit = pxa168_eth_start_xmit,
+	.ndo_set_rx_mode = pxa168_eth_set_rx_mode,
+	.ndo_set_mac_address = pxa168_eth_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_do_ioctl = pxa168_eth_do_ioctl,
+	.ndo_change_mtu = pxa168_eth_change_mtu,
+	.ndo_tx_timeout = pxa168_eth_tx_timeout,
+};
+
+static int pxa168_eth_probe(struct platform_device *pdev)
+{
+	struct pxa168_eth_private *pep = NULL;
+	struct net_device *dev = NULL;
+	struct resource *res;
+	struct clk *clk;
+	int err;
+
+	printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n");
+
+	clk = clk_get(&pdev->dev, "MFUCLK");
+	if (IS_ERR(clk)) {
+		printk(KERN_ERR "%s: Fast Ethernet failed to get clock\n",
+			DRIVER_NAME);
+		return -ENODEV;
+	}
+	clk_enable(clk);
+
+	dev = alloc_etherdev(sizeof(struct pxa168_eth_private));
+	if (!dev) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	platform_set_drvdata(pdev, dev);
+	pep = netdev_priv(dev);
+	pep->dev = dev;
+	pep->clk = clk;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		err = -ENODEV;
+		goto out;
+	}
+	pep->base = ioremap(res->start, res->end - res->start + 1);
+	if (pep->base == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	BUG_ON(!res);
+	dev->irq = res->start;
+	dev->netdev_ops = &pxa168_eth_netdev_ops;
+	dev->watchdog_timeo = 2 * HZ;
+	dev->base_addr = 0;
+	SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops);
+
+	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
+
+	printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME);
+	random_ether_addr(dev->dev_addr);
+
+	pep->pd = pdev->dev.platform_data;
+	pep->rx_ring_size = NUM_RX_DESCS;
+	if (pep->pd->rx_queue_size)
+		pep->rx_ring_size = pep->pd->rx_queue_size;
+
+	pep->tx_ring_size = NUM_TX_DESCS;
+	if (pep->pd->tx_queue_size)
+		pep->tx_ring_size = pep->pd->tx_queue_size;
+
+	pep->port_num = pep->pd->port_number;
+	/* Hardware supports only 3 ports */
+	BUG_ON(pep->port_num > 2);
+	netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size);
+
+	memset(&pep->timeout, 0, sizeof(struct timer_list));
+	init_timer(&pep->timeout);
+	pep->timeout.function = rxq_refill_timer_wrapper;
+	pep->timeout.data = (unsigned long)pep;
+
+	pep->smi_bus = mdiobus_alloc();
+	if (pep->smi_bus == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	pep->smi_bus->priv = pep;
+	pep->smi_bus->name = "pxa168_eth smi";
+	pep->smi_bus->read = pxa168_smi_read;
+	pep->smi_bus->write = pxa168_smi_write;
+	snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
+	pep->smi_bus->parent = &pdev->dev;
+	pep->smi_bus->phy_mask = 0xffffffff;
+	if (mdiobus_register(pep->smi_bus) < 0) {
+		err = -ENOMEM;
+		goto out;
+	}
+	pxa168_init_hw(pep);
+	err = ethernet_phy_setup(dev);
+	if (err)
+		goto out;
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	err = register_netdev(dev);
+	if (err)
+		goto out;
+	return 0;
+out:
+	if (pep->clk) {
+		clk_disable(pep->clk);
+		clk_put(pep->clk);
+		pep->clk = NULL;
+	}
+	if (pep->base) {
+		iounmap(pep->base);
+		pep->base = NULL;
+	}
+	if (dev)
+		free_netdev(dev);
+	return err;
+}
+
+static int pxa168_eth_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (pep->htpr) {
+		dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE,
+				  pep->htpr, pep->htpr_dma);
+		pep->htpr = NULL;
+	}
+	if (pep->clk) {
+		clk_disable(pep->clk);
+		clk_put(pep->clk);
+		pep->clk = NULL;
+	}
+	if (pep->phy != NULL)
+		phy_detach(pep->phy);
+
+	iounmap(pep->base);
+	pep->base = NULL;
+	unregister_netdev(dev);
+	flush_scheduled_work();
+	free_netdev(dev);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static void pxa168_eth_shutdown(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	eth_port_reset(dev);
+}
+
+#ifdef CONFIG_PM
+static int pxa168_eth_resume(struct platform_device *pdev)
+{
+	return -ENOSYS;
+}
+
+static int pxa168_eth_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	return -ENOSYS;
+}
+
+#else
+#define pxa168_eth_resume NULL
+#define pxa168_eth_suspend NULL
+#endif
+
+static struct platform_driver pxa168_eth_driver = {
+	.probe = pxa168_eth_probe,
+	.remove = pxa168_eth_remove,
+	.shutdown = pxa168_eth_shutdown,
+	.resume = pxa168_eth_resume,
+	.suspend = pxa168_eth_suspend,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   },
+};
+
+static int __init pxa168_init_module(void)
+{
+	return platform_driver_register(&pxa168_eth_driver);
+}
+
+static void __exit pxa168_cleanup_module(void)
+{
+	platform_driver_unregister(&pxa168_eth_driver);
+}
+
+module_init(pxa168_init_module);
+module_exit(pxa168_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ethernet driver for Marvell PXA168");
+MODULE_ALIAS("platform:pxa168_eth");
diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h
new file mode 100644
index 000000000000..18d75e795606
--- /dev/null
+++ b/include/linux/pxa168_eth.h
@@ -0,0 +1,30 @@
+/*
+ *pxa168 ethernet platform device data definition file.
+ */
+#ifndef __LINUX_PXA168_ETH_H
+#define __LINUX_PXA168_ETH_H
+
+struct pxa168_eth_platform_data {
+	int	port_number;
+	int	phy_addr;
+
+	/*
+	 * If speed is 0, then speed and duplex are autonegotiated.
+	 */
+	int	speed;		/* 0, SPEED_10, SPEED_100 */
+	int	duplex;		/* DUPLEX_HALF or DUPLEX_FULL */
+
+	/*
+	 * Override default RX/TX queue sizes if nonzero.
+	 */
+	int	rx_queue_size;
+	int	tx_queue_size;
+
+	/*
+	 * init callback is used for board specific initialization
+	 * e.g on Aspenite its used to initialize the PHY transceiver.
+	 */
+	int (*init)(void);
+};
+
+#endif /* __LINUX_PXA168_ETH_H */
-- 
cgit v1.2.3-59-g8ed1b


From e243f5b6de35b6fc394bc2e1e1737afe538e7e0c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 15 Aug 2010 10:03:57 +0000
Subject: netfilter: fix userspace header warning

"make headers_check" issued the following warning:

  CHECK   include/linux/netfilter (64 files)
usr/include/linux/netfilter/xt_ipvs.h:19: found __[us]{8,16,32,64} type without #include <linux/types.h>

Fix this by as suggested including linux/types.h.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_ipvs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h
index 1167aeb7a347..eff34ac18808 100644
--- a/include/linux/netfilter/xt_ipvs.h
+++ b/include/linux/netfilter/xt_ipvs.h
@@ -1,6 +1,8 @@
 #ifndef _XT_IPVS_H
 #define _XT_IPVS_H
 
+#include <linux/types.h>
+
 enum {
 	XT_IPVS_IPVS_PROPERTY =	1 << 0, /* all other options imply this one */
 	XT_IPVS_PROTO =		1 << 1,
-- 
cgit v1.2.3-59-g8ed1b


From ede1b4290781ae82ccf0f2ecc6dada8d3dd35779 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 18 Aug 2010 15:33:13 -0700
Subject: tracing: Fix timer tracing

PowerTOP would like to be able to trace timers.

Unfortunately, the current timer tracing is not very useful: the
actual timer function is not recorded in the trace at the start
of timer execution.

Although this is recorded for timer "start" time (when it gets
armed), this is not useful; most timers get started early, and a
tracer like PowerTOP will never see this event, but will only
see the actual running of the  timer.

This patch just adds the function to the timer tracing; I've
verified with PowerTOP that now it can get useful information
about timers.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: xiaoguangrong@cn.fujitsu.com
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org> # .35.x, .34.x, .33.x
LKML-Reference: <4C6C5FA9.3000405@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/timer.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index c624126a9c8a..425bcfe56c62 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -81,14 +81,16 @@ TRACE_EVENT(timer_expire_entry,
 	TP_STRUCT__entry(
 		__field( void *,	timer	)
 		__field( unsigned long,	now	)
+		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->timer		= timer;
 		__entry->now		= jiffies;
+		__entry->function	= timer->function;
 	),
 
-	TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
+	TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now)
 );
 
 /**
@@ -200,14 +202,16 @@ TRACE_EVENT(hrtimer_expire_entry,
 	TP_STRUCT__entry(
 		__field( void *,	hrtimer	)
 		__field( s64,		now	)
+		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->hrtimer	= hrtimer;
 		__entry->now		= now->tv64;
+		__entry->function	= hrtimer->function;
 	),
 
-	TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+	TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function,
 		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
-- 
cgit v1.2.3-59-g8ed1b


From 1495cc9df4e81f5a8fa9b0b8f1034b14d24b7d8c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:46 -0700
Subject: Input: sysrq - drop tty argument from sysrq ops handlers

Noone is using tty argument so let's get rid of it.

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/arm/kernel/etm.c           |  2 +-
 arch/powerpc/xmon/xmon.c        |  5 ++---
 arch/sparc/kernel/process_64.c  |  2 +-
 drivers/char/sysrq.c            | 42 ++++++++++++++++++++---------------------
 drivers/gpu/drm/drm_fb_helper.c |  2 +-
 drivers/net/ibm_newemac/debug.c |  2 +-
 include/linux/sysrq.h           |  6 +++++-
 kernel/debug/debug_core.c       |  2 +-
 kernel/power/poweroff.c         |  2 +-
 9 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c
index 56418f98cd01..33c7077174db 100644
--- a/arch/arm/kernel/etm.c
+++ b/arch/arm/kernel/etm.c
@@ -230,7 +230,7 @@ static void etm_dump(void)
 	etb_lock(t);
 }
 
-static void sysrq_etm_dump(int key, struct tty_struct *tty)
+static void sysrq_etm_dump(int key)
 {
 	dev_dbg(tracer.dev, "Dumping ETB buffer\n");
 	etm_dump();
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0554445200bf..d17d04cfb2cd 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2880,15 +2880,14 @@ static void xmon_init(int enable)
 }
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_xmon(int key, struct tty_struct *tty) 
+static void sysrq_handle_xmon(int key)
 {
 	/* ensure xmon is enabled */
 	xmon_init(1);
 	debugger(get_irq_regs());
 }
 
-static struct sysrq_key_op sysrq_xmon_op = 
-{
+static struct sysrq_key_op sysrq_xmon_op = {
 	.handler =	sysrq_handle_xmon,
 	.help_msg =	"Xmon",
 	.action_msg =	"Entering xmon",
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dbe81a368b45..25b01b43b40d 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -303,7 +303,7 @@ void arch_trigger_all_cpu_backtrace(void)
 
 #ifdef CONFIG_MAGIC_SYSRQ
 
-static void sysrq_handle_globreg(int key, struct tty_struct *tty)
+static void sysrq_handle_globreg(int key)
 {
 	arch_trigger_all_cpu_backtrace();
 }
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 878ac0c2cc68..a892a3c249dd 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -76,7 +76,7 @@ static int __init sysrq_always_enabled_setup(char *str)
 __setup("sysrq_always_enabled", sysrq_always_enabled_setup);
 
 
-static void sysrq_handle_loglevel(int key, struct tty_struct *tty)
+static void sysrq_handle_loglevel(int key)
 {
 	int i;
 
@@ -93,7 +93,7 @@ static struct sysrq_key_op sysrq_loglevel_op = {
 };
 
 #ifdef CONFIG_VT
-static void sysrq_handle_SAK(int key, struct tty_struct *tty)
+static void sysrq_handle_SAK(int key)
 {
 	struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
 	schedule_work(SAK_work);
@@ -109,7 +109,7 @@ static struct sysrq_key_op sysrq_SAK_op = {
 #endif
 
 #ifdef CONFIG_VT
-static void sysrq_handle_unraw(int key, struct tty_struct *tty)
+static void sysrq_handle_unraw(int key)
 {
 	struct kbd_struct *kbd = &kbd_table[fg_console];
 
@@ -126,7 +126,7 @@ static struct sysrq_key_op sysrq_unraw_op = {
 #define sysrq_unraw_op (*(struct sysrq_key_op *)NULL)
 #endif /* CONFIG_VT */
 
-static void sysrq_handle_crash(int key, struct tty_struct *tty)
+static void sysrq_handle_crash(int key)
 {
 	char *killer = NULL;
 
@@ -141,7 +141,7 @@ static struct sysrq_key_op sysrq_crash_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_reboot(int key, struct tty_struct *tty)
+static void sysrq_handle_reboot(int key)
 {
 	lockdep_off();
 	local_irq_enable();
@@ -154,7 +154,7 @@ static struct sysrq_key_op sysrq_reboot_op = {
 	.enable_mask	= SYSRQ_ENABLE_BOOT,
 };
 
-static void sysrq_handle_sync(int key, struct tty_struct *tty)
+static void sysrq_handle_sync(int key)
 {
 	emergency_sync();
 }
@@ -165,7 +165,7 @@ static struct sysrq_key_op sysrq_sync_op = {
 	.enable_mask	= SYSRQ_ENABLE_SYNC,
 };
 
-static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
+static void sysrq_handle_show_timers(int key)
 {
 	sysrq_timer_list_show();
 }
@@ -176,7 +176,7 @@ static struct sysrq_key_op sysrq_show_timers_op = {
 	.action_msg	= "Show clockevent devices & pending hrtimers (no others)",
 };
 
-static void sysrq_handle_mountro(int key, struct tty_struct *tty)
+static void sysrq_handle_mountro(int key)
 {
 	emergency_remount();
 }
@@ -188,7 +188,7 @@ static struct sysrq_key_op sysrq_mountro_op = {
 };
 
 #ifdef CONFIG_LOCKDEP
-static void sysrq_handle_showlocks(int key, struct tty_struct *tty)
+static void sysrq_handle_showlocks(int key)
 {
 	debug_show_all_locks();
 }
@@ -226,7 +226,7 @@ static void sysrq_showregs_othercpus(struct work_struct *dummy)
 
 static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus);
 
-static void sysrq_handle_showallcpus(int key, struct tty_struct *tty)
+static void sysrq_handle_showallcpus(int key)
 {
 	/*
 	 * Fall back to the workqueue based printing if the
@@ -252,7 +252,7 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
 };
 #endif
 
-static void sysrq_handle_showregs(int key, struct tty_struct *tty)
+static void sysrq_handle_showregs(int key)
 {
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
@@ -266,7 +266,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_showstate(int key, struct tty_struct *tty)
+static void sysrq_handle_showstate(int key)
 {
 	show_state();
 }
@@ -277,7 +277,7 @@ static struct sysrq_key_op sysrq_showstate_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty)
+static void sysrq_handle_showstate_blocked(int key)
 {
 	show_state_filter(TASK_UNINTERRUPTIBLE);
 }
@@ -291,7 +291,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
 #ifdef CONFIG_TRACING
 #include <linux/ftrace.h>
 
-static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
+static void sysrq_ftrace_dump(int key)
 {
 	ftrace_dump(DUMP_ALL);
 }
@@ -305,7 +305,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
 #define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL)
 #endif
 
-static void sysrq_handle_showmem(int key, struct tty_struct *tty)
+static void sysrq_handle_showmem(int key)
 {
 	show_mem();
 }
@@ -330,7 +330,7 @@ static void send_sig_all(int sig)
 	}
 }
 
-static void sysrq_handle_term(int key, struct tty_struct *tty)
+static void sysrq_handle_term(int key)
 {
 	send_sig_all(SIGTERM);
 	console_loglevel = 8;
@@ -349,7 +349,7 @@ static void moom_callback(struct work_struct *ignored)
 
 static DECLARE_WORK(moom_work, moom_callback);
 
-static void sysrq_handle_moom(int key, struct tty_struct *tty)
+static void sysrq_handle_moom(int key)
 {
 	schedule_work(&moom_work);
 }
@@ -361,7 +361,7 @@ static struct sysrq_key_op sysrq_moom_op = {
 };
 
 #ifdef CONFIG_BLOCK
-static void sysrq_handle_thaw(int key, struct tty_struct *tty)
+static void sysrq_handle_thaw(int key)
 {
 	emergency_thaw_all();
 }
@@ -373,7 +373,7 @@ static struct sysrq_key_op sysrq_thaw_op = {
 };
 #endif
 
-static void sysrq_handle_kill(int key, struct tty_struct *tty)
+static void sysrq_handle_kill(int key)
 {
 	send_sig_all(SIGKILL);
 	console_loglevel = 8;
@@ -385,7 +385,7 @@ static struct sysrq_key_op sysrq_kill_op = {
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
-static void sysrq_handle_unrt(int key, struct tty_struct *tty)
+static void sysrq_handle_unrt(int key)
 {
 	normalize_rt_tasks();
 }
@@ -520,7 +520,7 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			printk("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
-			op_p->handler(key, tty);
+			op_p->handler(key);
 		} else {
 			printk("This sysrq operation is disabled.\n");
 		}
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index de82e201d682..5efd6d6742ec 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -369,7 +369,7 @@ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored)
 }
 static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn);
 
-static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3)
+static void drm_fb_helper_sysrq(int dummy1)
 {
 	schedule_work(&drm_fb_helper_restore_work);
 }
diff --git a/drivers/net/ibm_newemac/debug.c b/drivers/net/ibm_newemac/debug.c
index 3995fafc1e08..8c6c1e2a8750 100644
--- a/drivers/net/ibm_newemac/debug.c
+++ b/drivers/net/ibm_newemac/debug.c
@@ -238,7 +238,7 @@ void emac_dbg_dump_all(void)
 }
 
 #if defined(CONFIG_MAGIC_SYSRQ)
-static void emac_sysrq_handler(int key, struct tty_struct *tty)
+static void emac_sysrq_handler(int key)
 {
 	emac_dbg_dump_all();
 }
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 609e8ca5f534..4ee650315119 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -31,7 +31,7 @@ struct tty_struct;
 #define SYSRQ_ENABLE_RTNICE	0x0100
 
 struct sysrq_key_op {
-	void (*handler)(int, struct tty_struct *);
+	void (*handler)(int);
 	char *help_msg;
 	char *action_msg;
 	int enable_mask;
@@ -58,6 +58,10 @@ static inline void handle_sysrq(int key, struct tty_struct *tty)
 {
 }
 
+static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+{
+}
+
 static inline int register_sysrq_key(int key, struct sysrq_key_op *op)
 {
 	return -EINVAL;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3c2d4972d235..de407c78178d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -741,7 +741,7 @@ static struct console kgdbcons = {
 };
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_dbg(int key, struct tty_struct *tty)
+static void sysrq_handle_dbg(int key)
 {
 	if (!dbg_io_ops) {
 		printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index e8b337006276..d52359374e85 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -24,7 +24,7 @@ static void do_poweroff(struct work_struct *dummy)
 
 static DECLARE_WORK(poweroff_work, do_poweroff);
 
-static void handle_poweroff(int key, struct tty_struct *tty)
+static void handle_poweroff(int key)
 {
 	/* run sysrq poweroff on boot cpu */
 	schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
-- 
cgit v1.2.3-59-g8ed1b


From b35de43b31040828f83046f40fd34ba33146409d Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@develer.com>
Date: Thu, 19 Aug 2010 14:13:27 -0700
Subject: kfifo: implement missing __kfifo_skip_r()

kfifo_skip() is currently broken, due to the missing of the internal
helper function.  Add it.

Signed-off-by: Andrea Righi <arighi@develer.com>
Cc: Greg KH <greg@kroah.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 ++
 kernel/kfifo.c        | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 311f8753d713..4aa95f203f3e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -836,6 +836,8 @@ extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize);
 
 extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize);
 
+extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize);
+
 extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo,
 	void *buf, unsigned int len, size_t recsize);
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4502604ecadf..6b5580c57644 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
 }
 EXPORT_SYMBOL(__kfifo_out_r);
 
+void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
+{
+	unsigned int n;
+
+	n = __kfifo_peek_n(fifo, recsize);
+	fifo->out += n + recsize;
+}
+EXPORT_SYMBOL(__kfifo_skip_r);
+
 int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
 	unsigned long len, unsigned int *copied, size_t recsize)
 {
-- 
cgit v1.2.3-59-g8ed1b


From f335397d177c906256ee1bba28e8c49e8ec63817 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:47 -0700
Subject: Input: sysrq - drop tty argument form handle_sysrq()

Sysrq operations do not accept tty argument anymore so no need to pass
it to us.

[Stephen Rothwell <sfr@canb.auug.org.au>: fix build breakage in drm code
 caused by sysrq using bool but not including linux/types.h]

[Sachin Sant <sachinp@in.ibm.com>: fix build breakage in s390 keyboadr
 driver]

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/ia64/hp/sim/simserial.c    |  2 +-
 arch/um/drivers/mconsole_kern.c |  2 +-
 drivers/char/hangcheck-timer.c  |  2 +-
 drivers/char/hvc_console.c      |  2 +-
 drivers/char/hvsi.c             |  2 +-
 drivers/char/sysrq.c            | 11 +++++------
 drivers/s390/char/ctrlchar.c    |  4 +---
 drivers/s390/char/keyboard.c    |  2 +-
 drivers/serial/sn_console.c     |  2 +-
 drivers/usb/serial/generic.c    |  2 +-
 drivers/xen/manage.c            |  2 +-
 include/linux/serial_core.h     |  2 +-
 include/linux/sysrq.h           | 12 +++++-------
 kernel/debug/kdb/kdb_main.c     |  2 +-
 14 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 2bef5261d96d..1e8d71ad93ef 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -149,7 +149,7 @@ static  void receive_chars(struct tty_struct *tty)
 						ch = ia64_ssc(0, 0, 0, 0,
 							      SSC_GETCHAR);
 					while (!ch);
-					handle_sysrq(ch, NULL);
+					handle_sysrq(ch);
 				}
 #endif
 				seen_esc = 0;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index de317d0c3294..ebc680717e59 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -690,7 +690,7 @@ static void with_console(struct mc_request *req, void (*proc)(void *),
 static void sysrq_proc(void *arg)
 {
 	char *op = arg;
-	handle_sysrq(*op, NULL);
+	handle_sysrq(*op);
 }
 
 void mconsole_sysrq(struct mc_request *req)
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index e0249722d25f..f953c96efc86 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -159,7 +159,7 @@ static void hangcheck_fire(unsigned long data)
 		if (hangcheck_dump_tasks) {
 			printk(KERN_CRIT "Hangcheck: Task state:\n");
 #ifdef CONFIG_MAGIC_SYSRQ
-			handle_sysrq('t', NULL);
+			handle_sysrq('t');
 #endif  /* CONFIG_MAGIC_SYSRQ */
 		}
 		if (hangcheck_reboot) {
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index fa27d1676ee5..3afd62e856eb 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -651,7 +651,7 @@ int hvc_poll(struct hvc_struct *hp)
 					if (sysrq_pressed)
 						continue;
 				} else if (sysrq_pressed) {
-					handle_sysrq(buf[i], tty);
+					handle_sysrq(buf[i]);
 					sysrq_pressed = 0;
 					continue;
 				}
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 1f4b6de65a2d..a2bc885ce60a 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -403,7 +403,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, const char *buf, int len)
 			hp->sysrq = 1;
 			continue;
 		} else if (hp->sysrq) {
-			handle_sysrq(c, hp->tty);
+			handle_sysrq(c);
 			hp->sysrq = 0;
 			continue;
 		}
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index a892a3c249dd..ef31bb81e843 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
-#include <linux/tty.h>
 #include <linux/mount.h>
 #include <linux/kdev_t.h>
 #include <linux/major.h>
@@ -493,7 +492,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+void __handle_sysrq(int key, bool check_mask)
 {
 	struct sysrq_key_op *op_p;
 	int orig_log_level;
@@ -545,10 +544,10 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 	spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
 }
 
-void handle_sysrq(int key, struct tty_struct *tty)
+void handle_sysrq(int key)
 {
 	if (sysrq_on())
-		__handle_sysrq(key, tty, 1);
+		__handle_sysrq(key, true);
 }
 EXPORT_SYMBOL(handle_sysrq);
 
@@ -597,7 +596,7 @@ static bool sysrq_filter(struct input_handle *handle, unsigned int type,
 
 	default:
 		if (sysrq_down && value && value != 2)
-			__handle_sysrq(sysrq_xlate[code], NULL, 1);
+			__handle_sysrq(sysrq_xlate[code], true);
 		break;
 	}
 
@@ -765,7 +764,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
 
 		if (get_user(c, buf))
 			return -EFAULT;
-		__handle_sysrq(c, NULL, 0);
+		__handle_sysrq(c, false);
 	}
 
 	return count;
diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c
index c6cbcb3f925e..0e9a309b9669 100644
--- a/drivers/s390/char/ctrlchar.c
+++ b/drivers/s390/char/ctrlchar.c
@@ -16,12 +16,11 @@
 
 #ifdef CONFIG_MAGIC_SYSRQ
 static int ctrlchar_sysrq_key;
-static struct tty_struct *sysrq_tty;
 
 static void
 ctrlchar_handle_sysrq(struct work_struct *work)
 {
-	handle_sysrq(ctrlchar_sysrq_key, sysrq_tty);
+	handle_sysrq(ctrlchar_sysrq_key);
 }
 
 static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq);
@@ -54,7 +53,6 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty)
 	/* racy */
 	if (len == 3 && buf[1] == '-') {
 		ctrlchar_sysrq_key = buf[2];
-		sysrq_tty = tty;
 		schedule_work(&ctrlchar_work);
 		return CTRLCHAR_SYSRQ;
 	}
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 18d9a497863b..8cd58e412b5e 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -305,7 +305,7 @@ kbd_keycode(struct kbd_data *kbd, unsigned int keycode)
 		if (kbd->sysrq) {
 			if (kbd->sysrq == K(KT_LATIN, '-')) {
 				kbd->sysrq = 0;
-				handle_sysrq(value, kbd->tty);
+				handle_sysrq(value);
 				return;
 			}
 			if (value == '-') {
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index 7e5e5efea4e2..cff9a306660f 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -492,7 +492,7 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
                         sysrq_requested = 0;
                         if (ch && time_before(jiffies, sysrq_timeout)) {
                                 spin_unlock_irqrestore(&port->sc_port.lock, flags);
-                                handle_sysrq(ch, NULL);
+                                handle_sysrq(ch);
                                 spin_lock_irqsave(&port->sc_port.lock, flags);
                                 /* ignore actual sysrq command char */
                                 continue;
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index ca92f67747cc..1e846cc3c7a4 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -453,7 +453,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 {
 	if (port->sysrq && port->port.console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, tty);
+			handle_sysrq(ch);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 1799bd890315..ef9c7db52077 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -237,7 +237,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
 		goto again;
 
 	if (sysrq_key != '\0')
-		handle_sysrq(sysrq_key, NULL);
+		handle_sysrq(sysrq_key);
 }
 
 static struct xenbus_watch sysrq_watch = {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3c2ad99fed34..64458a9a8938 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -465,7 +465,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->state->port.tty);
+			handle_sysrq(ch);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 4ee650315119..387fa7d05c98 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -15,9 +15,7 @@
 #define _LINUX_SYSRQ_H
 
 #include <linux/errno.h>
-
-struct pt_regs;
-struct tty_struct;
+#include <linux/types.h>
 
 /* Possible values of bitmask for enabling sysrq functions */
 /* 0x0001 is reserved for enable everything */
@@ -44,8 +42,8 @@ struct sysrq_key_op {
  * are available -- else NULL's).
  */
 
-void handle_sysrq(int key, struct tty_struct *tty);
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+void handle_sysrq(int key);
+void __handle_sysrq(int key, bool check_mask);
 int register_sysrq_key(int key, struct sysrq_key_op *op);
 int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
@@ -54,11 +52,11 @@ int sysrq_toggle_support(int enable_mask);
 
 #else
 
-static inline void handle_sysrq(int key, struct tty_struct *tty)
+static inline void handle_sysrq(int key)
 {
 }
 
-static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+static inline void __handle_sysrq(int key, bool check_mask)
 {
 }
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 28b844118bbd..caf057a3de0e 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1929,7 +1929,7 @@ static int kdb_sr(int argc, const char **argv)
 	if (argc != 1)
 		return KDB_ARGCOUNT;
 	kdb_trap_printk++;
-	__handle_sysrq(*argv[1], NULL, 0);
+	__handle_sysrq(*argv[1], false);
 	kdb_trap_printk--;
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 6ee9f4b4affe751d313d2538999aeec134d413a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:47 -0700
Subject: USB: drop tty argument from usb_serial_handle_sysrq_char()

Since handle_sysrq() does not take tty as argument anymore we can
drop it from usb_serial_handle_sysrq_char() as well.

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/usb/serial/ftdi_sio.c | 2 +-
 drivers/usb/serial/generic.c  | 8 +++-----
 drivers/usb/serial/pl2303.c   | 2 +-
 drivers/usb/serial/ssu100.c   | 2 +-
 include/linux/usb/serial.h    | 3 +--
 5 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index eb12d9b096b4..5d47983b533c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1831,7 +1831,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else {
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 1e846cc3c7a4..1abe34c38c08 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -343,7 +343,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb)
 		tty_insert_flip_string(tty, ch, urb->actual_length);
 	else {
 		for (i = 0; i < urb->actual_length; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, TTY_NORMAL);
 		}
 	}
@@ -448,8 +448,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle);
 
 #ifdef CONFIG_MAGIC_SYSRQ
-int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-			struct usb_serial_port *port, unsigned int ch)
+int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
 {
 	if (port->sysrq && port->port.console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
@@ -462,8 +461,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 	return 0;
 }
 #else
-int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-			struct usb_serial_port *port, unsigned int ch)
+int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
 {
 	return 0;
 }
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 6b6001822279..34ad7b3d5948 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -788,7 +788,7 @@ static void pl2303_process_read_urb(struct urb *urb)
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < urb->actual_length; ++i)
-			if (!usb_serial_handle_sysrq_char(tty, port, data[i]))
+			if (!usb_serial_handle_sysrq_char(port, data[i]))
 				tty_insert_flip_char(tty, data[i], tty_flag);
 	} else {
 		tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 6e82d4f54bc8..819de4740388 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -596,7 +596,7 @@ static int ssu100_process_packet(struct tty_struct *tty,
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 84a4c44c208b..55675b1efb28 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -342,8 +342,7 @@ extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 extern void usb_serial_generic_process_read_urb(struct urb *urb);
 extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
 						void *dest, size_t size);
-extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-					struct usb_serial_port *port,
+extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port,
 					unsigned int ch);
 extern int usb_serial_handle_break(struct usb_serial_port *port);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8905aaafb4b5d9764c5b4b54c7d03eb41bb0a7e9 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 19 Aug 2010 09:52:28 -0700
Subject: Input: uinput - add devname alias to allow module on-demand load

Recent modprobe and udev versions allow to create device nodes
for modules which are not loaded. Only the first access will cause
the in-kernel module loader to pull-in the module. Systems which
never access the device node will not needlessly load the module,
and no longer need init scripts or other facilities to unconditionally
load it.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/uinput.c | 2 ++
 include/linux/miscdevice.h  | 1 +
 include/linux/uinput.h      | 1 -
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index bb53fd33cd1c..0d4266a533a5 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -811,6 +811,8 @@ static struct miscdevice uinput_misc = {
 	.minor		= UINPUT_MINOR,
 	.name		= UINPUT_NAME,
 };
+MODULE_ALIAS_MISCDEV(UINPUT_MINOR);
+MODULE_ALIAS("devname:" UINPUT_NAME);
 
 static int __init uinput_init(void)
 {
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index bafffc737903..18fd13028ba1 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -33,6 +33,7 @@
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
 #define MPT_MINOR		220
 #define MPT2SAS_MINOR		221
+#define UINPUT_MINOR		223
 #define HPET_MINOR		228
 #define FUSE_MINOR		229
 #define KVM_MINOR		232
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index 60c81da77f0f..05f7fed2b173 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -37,7 +37,6 @@
 #define UINPUT_VERSION		3
 
 #ifdef __KERNEL__
-#define UINPUT_MINOR		223
 #define UINPUT_NAME		"uinput"
 #define UINPUT_BUFFER_SIZE	16
 #define UINPUT_NUM_REQUESTS	16
-- 
cgit v1.2.3-59-g8ed1b


From 297c5eee372478fc32fec5fe8eed711eedb13f3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 Aug 2010 16:24:55 -0700
Subject: mm: make the vma list be doubly linked

It's a really simple list, and several of the users want to go backwards
in it to find the previous vma.  So rather than have to look up the
previous entry with 'find_vma_prev()' or something similar, just make it
doubly linked instead.

Tested-by: Ian Campbell <ijc@hellion.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 kernel/fork.c            |  7 +++++--
 mm/mmap.c                | 21 +++++++++++++++++----
 mm/nommu.c               |  7 +++++--
 4 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b8bb9a6a1f37..ee7e258627f9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -134,7 +134,7 @@ struct vm_area_struct {
 					   within vm_mm. */
 
 	/* linked list of VM areas per task, sorted by address */
-	struct vm_area_struct *vm_next;
+	struct vm_area_struct *vm_next, *vm_prev;
 
 	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
 	unsigned long vm_flags;		/* Flags, see mm.h. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 856eac3ec52e..b7e9d60a675d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
 #ifdef CONFIG_MMU
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
-	struct vm_area_struct *mpnt, *tmp, **pprev;
+	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	if (retval)
 		goto out;
 
+	prev = NULL;
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
 
@@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			goto fail_nomem_anon_vma_fork;
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
-		tmp->vm_next = NULL;
+		tmp->vm_next = tmp->vm_prev = NULL;
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		 */
 		*pprev = tmp;
 		pprev = &tmp->vm_next;
+		tmp->vm_prev = prev;
+		prev = tmp;
 
 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
 		rb_link = &tmp->vm_rb.rb_right;
diff --git a/mm/mmap.c b/mm/mmap.c
index 31003338b978..331e51af38c9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -388,17 +388,23 @@ static inline void
 __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent)
 {
+	struct vm_area_struct *next;
+
+	vma->vm_prev = prev;
 	if (prev) {
-		vma->vm_next = prev->vm_next;
+		next = prev->vm_next;
 		prev->vm_next = vma;
 	} else {
 		mm->mmap = vma;
 		if (rb_parent)
-			vma->vm_next = rb_entry(rb_parent,
+			next = rb_entry(rb_parent,
 					struct vm_area_struct, vm_rb);
 		else
-			vma->vm_next = NULL;
+			next = NULL;
 	}
+	vma->vm_next = next;
+	if (next)
+		next->vm_prev = vma;
 }
 
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -483,7 +489,11 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev)
 {
-	prev->vm_next = vma->vm_next;
+	struct vm_area_struct *next = vma->vm_next;
+
+	prev->vm_next = next;
+	if (next)
+		next->vm_prev = prev;
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = prev;
@@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long addr;
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+	vma->vm_prev = NULL;
 	do {
 		rb_erase(&vma->vm_rb, &mm->mm_rb);
 		mm->map_count--;
@@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma = vma->vm_next;
 	} while (vma && vma->vm_start < end);
 	*insertion_point = vma;
+	if (vma)
+		vma->vm_prev = prev;
 	tail_vma->vm_next = NULL;
 	if (mm->unmap_area == arch_unmap_area)
 		addr = prev ? prev->vm_end : mm->mmap_base;
diff --git a/mm/nommu.c b/mm/nommu.c
index efa9a380335e..88ff091eb07a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
  */
 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	struct vm_area_struct *pvma, **pp;
+	struct vm_area_struct *pvma, **pp, *next;
 	struct address_space *mapping;
 	struct rb_node **p, *parent;
 
@@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 			break;
 	}
 
-	vma->vm_next = *pp;
+	next = *pp;
 	*pp = vma;
+	vma->vm_next = next;
+	if (next)
+		next->vm_prev = vma;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From e36c886a0f9d624377977fa6cae309cfd7f362fa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 21 Aug 2010 13:07:26 -0700
Subject: workqueue: Add basic tracepoints to track workqueue execution

With the introduction of the new unified work queue thread pools,
we lost one feature: It's no longer possible to know which worker
is causing the CPU to wake out of idle. The result is that PowerTOP
now reports a lot of "kworker/a:b" instead of more readable results.

This patch adds a pair of tracepoints to the new workqueue code,
similar in style to the timer/hrtimer tracepoints.

With this pair of tracepoints, the next PowerTOP can correctly
report which work item caused the wakeup (and how long it took):

Interrupt (43)            i915      time   3.51ms    wakeups 141
Work      ieee80211_iface_work      time   0.81ms    wakeups  29
Work              do_dbs_timer      time   0.55ms    wakeups  24
Process                   Xorg      time  21.36ms    wakeups   4
Timer    sched_rt_period_timer      time   0.01ms    wakeups   1

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/workqueue.h | 62 ++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c               |  9 ++++++
 2 files changed, 71 insertions(+)
 create mode 100644 include/trace/events/workqueue.h

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 000000000000..49682d7e9d60
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,62 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/tracepoint.h>
+#include <linux/workqueue.h>
+
+/**
+ * workqueue_execute_start - called immediately before the workqueue callback
+ * @work:	pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_start,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field( void *,	work	)
+		__field( void *,	function)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+		__entry->function	= work->func;
+	),
+
+	TP_printk("work struct %p: function %pf", __entry->work, __entry->function)
+);
+
+/**
+ * workqueue_execute_end - called immediately before the workqueue callback
+ * @work:	pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_end,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field( void *,	work	)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+	),
+
+	TP_printk("work struct %p", __entry->work)
+);
+
+
+#endif /*  _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2994a0e3a61c..8bd600c020e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,9 @@
 #include <linux/lockdep.h>
 #include <linux/idr.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
+
 #include "workqueue_sched.h"
 
 enum {
@@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	work_clear_pending(work);
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
+	trace_workqueue_execute_start(work);
 	f(work);
+	/*
+	 * While we must be careful to not use "work" after this, the trace
+	 * point will only record its address.
+	 */
+	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
 	lock_map_release(&cwq->wq->lockdep_map);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 18 Aug 2010 12:25:50 -0400
Subject: fanotify: flush outstanding perm requests on group destroy

When an fanotify listener is closing it may cause a deadlock between the
listener and the original task doing an fs operation.  If the original task
is waiting for a permissions response it will be holding the srcu lock.  The
listener cannot clean up and exit until after that srcu lock is syncronized.
Thus deadlock.  The fix introduced here is to stop accepting new permissions
events when a listener is shutting down and to grant permission for all
outstanding events.  Thus the original task will eventually release the srcu
lock and the listener can complete shutdown.

Reported-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++
 include/linux/fanotify.h           |  7 -------
 include/linux/fsnotify_backend.h   |  1 +
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 032b837fcd11..b966b7230f47 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 	re->fd = fd;
 
 	mutex_lock(&group->fanotify_data.access_mutex);
+
+	if (group->fanotify_data.bypass_perm) {
+		mutex_unlock(&group->fanotify_data.access_mutex);
+		kmem_cache_free(fanotify_response_event_cache, re);
+		event->response = FAN_ALLOW;
+		return 0;
+	}
+		
 	list_add_tail(&re->list, &group->fanotify_data.access_list);
 	mutex_unlock(&group->fanotify_data.access_mutex);
 
@@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
+	struct fanotify_response_event *re, *lre;
 
 	pr_debug("%s: file=%p group=%p\n", __func__, file, group);
 
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	mutex_lock(&group->fanotify_data.access_mutex);
+
+	group->fanotify_data.bypass_perm = true;
+
+	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
+		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
+			 re, re->event);
+
+		list_del_init(&re->list);
+		re->event->response = FAN_ALLOW;
+
+		kmem_cache_free(fanotify_response_event_cache, re);
+	}
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	wake_up(&group->fanotify_data.access_waitq);
+#endif
 	/* matches the fanotify_init->fsnotify_alloc_group */
 	fsnotify_put_group(group);
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index f0949a57ca9d..985435622ecd 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -95,11 +95,4 @@ struct fanotify_response {
 				(long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
 				(long)(meta)->event_len <= (long)(len))
 
-#ifdef __KERNEL__
-
-struct fanotify_wait {
-	struct fsnotify_event *event;
-	__s32 fd;
-};
-#endif /* __KERNEL__ */
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index ed36fb57c426..e40190d16878 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -156,6 +156,7 @@ struct fsnotify_group {
 			struct mutex access_mutex;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
+			bool bypass_perm; /* protected by access_mutex */
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 		} fanotify_data;
-- 
cgit v1.2.3-59-g8ed1b


From 09cd2b99c6cdd1e14e84c1febca2fb91e9f4e5ba Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 17:25:05 +0000
Subject: header: fix broken headers for user space

__packed is only defined in kernel space, so we should use
__attribute__((packed)) for the code shared between kernel and user space.

Two __attribute() annotations are replaced with __attribute__() too.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h               |  2 +-
 include/linux/if_fddi.h                |  8 ++++----
 include/linux/if_hippi.h               |  8 ++++----
 include/linux/if_pppox.h               | 10 +++++-----
 include/linux/ipv6.h                   |  4 ++--
 include/linux/nbd.h                    |  2 +-
 include/linux/ncp.h                    | 10 +++++-----
 include/linux/netfilter/xt_IDLETIMER.h |  2 +-
 include/linux/phonet.h                 |  4 ++--
 include/linux/rfkill.h                 |  2 +-
 10 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index c831467774d0..bed7a4682b90 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -119,7 +119,7 @@ struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
-} __packed;
+} __attribute__((packed));
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 9947c39e62f6..e6dc11e7f9a5 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -67,7 +67,7 @@ struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-} __packed;
+} __attribute__((packed));
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
@@ -75,7 +75,7 @@ struct fddi_8022_2_hdr {
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-} __packed;
+} __attribute__((packed));
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
@@ -85,7 +85,7 @@ struct fddi_snap_hdr {
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-} __packed;
+} __attribute__((packed));
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
@@ -98,7 +98,7 @@ struct fddihdr {
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-} __packed;
+} __attribute__((packed));
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 5fe5f307c6f5..cdc049f1829a 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -104,7 +104,7 @@ struct hippi_fp_hdr {
 	__be32		fixed;
 #endif
 	__be32		d2_size;
-} __packed;
+} __attribute__((packed));
 
 struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -129,7 +129,7 @@ struct hippi_le_hdr {
 	__u8		daddr[HIPPI_ALEN];
 	__u16		locally_administered;
 	__u8		saddr[HIPPI_ALEN];
-} __packed;
+} __attribute__((packed));
 
 #define HIPPI_OUI_LEN	3
 /*
@@ -142,12 +142,12 @@ struct hippi_snap_hdr {
 	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[HIPPI_OUI_LEN];	/* organizational universal id (zero)*/
 	__be16	ethertype;		/* packet type ID field */
-} __packed;
+} __attribute__((packed));
 
 struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
-} __packed;
+} __attribute__((packed));
 
 #endif	/* _LINUX_IF_HIPPI_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 1925e0c3f162..27741e05446f 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -59,7 +59,7 @@ struct sockaddr_pppox {
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-} __packed;
+} __attribute__((packed));
 
 /* The use of the above union isn't viable because the size of this
  * struct must stay fixed over time -- applications use sizeof(struct
@@ -70,7 +70,7 @@ struct sockaddr_pppol2tp {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tp_addr pppol2tp;
-} __packed;
+} __attribute__((packed));
 
 /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
  * bits. So we need a different sockaddr structure.
@@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tpv3_addr pppol2tp;
-} __packed;
+} __attribute__((packed));
 
 /*********************************************************************
  *
@@ -101,7 +101,7 @@ struct pppoe_tag {
 	__be16 tag_type;
 	__be16 tag_len;
 	char tag_data[0];
-} __attribute ((packed));
+} __attribute__ ((packed));
 
 /* Tag identifiers */
 #define PTT_EOL		__cpu_to_be16(0x0000)
@@ -129,7 +129,7 @@ struct pppoe_hdr {
 	__be16 sid;
 	__be16 length;
 	struct pppoe_tag tag[0];
-} __packed;
+} __attribute__((packed));
 
 /* Length of entire PPPoE + PPP header */
 #define PPPOE_SES_HLEN	8
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ab9e9e89e407..e62683ba88e6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -58,7 +58,7 @@ struct ipv6_opt_hdr {
 	/* 
 	 * TLV encoded option data follows.
 	 */
-} __packed;	/* required for some archs */
+} __attribute__((packed));	/* required for some archs */
 
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
@@ -99,7 +99,7 @@ struct ipv6_destopt_hao {
 	__u8			type;
 	__u8			length;
 	struct in6_addr		addr;
-} __packed;
+} __attribute__((packed));
 
 /*
  *	IPv6 fixed header
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index bb58854a8061..d146ca10c0f5 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -88,7 +88,7 @@ struct nbd_request {
 	char handle[8];
 	__be64 from;
 	__be32 len;
-} __packed;
+} __attribute__((packed));
 
 /*
  * This is the reply packet that nbd-server sends back to the client after
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 3ace8370e61e..99f0adeeb3f3 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -27,7 +27,7 @@ struct ncp_request_header {
 	__u8 conn_high;
 	__u8 function;
 	__u8 data[0];
-} __packed;
+} __attribute__((packed));
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG		 (0x3E3E)
@@ -42,7 +42,7 @@ struct ncp_reply_header {
 	__u8 completion_code;
 	__u8 connection_state;
 	__u8 data[0];
-} __packed;
+} __attribute__((packed));
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -158,7 +158,7 @@ struct nw_info_struct {
 #ifdef __KERNEL__
 	struct nw_nfs_info nfs;
 #endif
-} __packed;
+} __attribute__((packed));
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES		  (cpu_to_le32(0x02))
@@ -190,12 +190,12 @@ struct nw_modify_dos_info {
 	__u16 inheritanceGrantMask;
 	__u16 inheritanceRevokeMask;
 	__u32 maximumSpace;
-} __packed;
+} __attribute__((packed));
 
 struct nw_search_sequence {
 	__u8 volNumber;
 	__u32 dirBase;
 	__u32 sequence;
-} __packed;
+} __attribute__((packed));
 
 #endif				/* _LINUX_NCP_H */
diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
index 3e1aa1be942e..208ae9387331 100644
--- a/include/linux/netfilter/xt_IDLETIMER.h
+++ b/include/linux/netfilter/xt_IDLETIMER.h
@@ -39,7 +39,7 @@ struct idletimer_tg_info {
 	char label[MAX_IDLETIMER_LABEL_SIZE];
 
 	/* for kernel module internal use only */
-	struct idletimer_tg *timer __attribute((aligned(8)));
+	struct idletimer_tg *timer __attribute__((aligned(8)));
 };
 
 #endif
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 24426c3d6b5a..76edadf046d3 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -56,7 +56,7 @@ struct phonethdr {
 	__be16	pn_length;
 	__u8	pn_robj;
 	__u8	pn_sobj;
-} __packed;
+} __attribute__((packed));
 
 /* Common Phonet payload header */
 struct phonetmsg {
@@ -98,7 +98,7 @@ struct sockaddr_pn {
 	__u8 spn_dev;
 	__u8 spn_resource;
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
-} __packed;
+} __attribute__((packed));
 
 /* Well known address */
 #define PN_DEV_PC	0x10
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 4f82326eb294..08c32e4f261a 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -81,7 +81,7 @@ struct rfkill_event {
 	__u8  type;
 	__u8  op;
 	__u8  soft, hard;
-} __packed;
+} __attribute__((packed));
 
 /*
  * We are planning to be backward and forward compatible with changes
-- 
cgit v1.2.3-59-g8ed1b


From c93a4dfb31f2c023da3ad1238c352452f2cc0e05 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 11:59:28 +0100
Subject: xen: pvhvm: allow user to request no emulated device unplug

this allows the user to disable pvhvm and revert to emulated devices
in case of a system misconfiguration (e.g. initramfs with only
emulated drivers in it).

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 Documentation/kernel-parameters.txt | 1 +
 arch/x86/xen/platform-pci-unplug.c  | 5 +++++
 include/xen/platform_pci.h          | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2c85c0692b01..8bbe83b9d0b2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2631,6 +2631,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			all -- unplug all emulated devices (NICs and IDE disks)
 			ignore -- continue loading the Xen platform PCI driver even
 				if the version check failed
+			never -- do not unplug even if version check succeeds
 
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 554c002a1e1a..070dfa0654bd 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -72,6 +72,9 @@ void __init xen_unplug_emulated_devices(void)
 {
 	int r;
 
+	/* user explicitly requested no unplug */
+	if (xen_emul_unplug & XEN_UNPLUG_NEVER)
+		return;
 	/* check the version of the xen platform PCI device */
 	r = check_platform_magic();
 	/* If the version matches enable the Xen platform PCI driver.
@@ -127,6 +130,8 @@ static int __init parse_xen_emul_unplug(char *arg)
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
 		else if (!strncmp(p, "ignore", l))
 			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+		else if (!strncmp(p, "never", l))
+			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
 			printk(KERN_WARNING "unrecognised option '%s' "
 				 "in parameter 'xen_emul_unplug'\n", p);
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index ce9d671c636c..123b7752fa6a 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -21,6 +21,7 @@
 #define XEN_UNPLUG_AUX_IDE_DISKS 4
 #define XEN_UNPLUG_ALL 7
 #define XEN_UNPLUG_IGNORE 8
+#define XEN_UNPLUG_NEVER 16
 
 static inline int xen_must_unplug_nics(void) {
 #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
-- 
cgit v1.2.3-59-g8ed1b


From 1dc7ce99b091a11cce0f34456c1ffcb928f17edd Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 11:59:29 +0100
Subject: xen: pvhvm: rename xen_emul_unplug=ignore to =unnnecessary

It is not immediately clear what this option causes to become
ignored. The actual meaning is that it is not necessary to unplug the
emulated devices to safely use the PV ones, even if the platform does
not support the unplug protocol. (pressumably the user will only add
this option if they have ensured that their domain configuration is
safe).

I think xen_emul_unplug=unnecessary better captures this.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 Documentation/kernel-parameters.txt |  5 +++--
 arch/x86/xen/platform-pci-unplug.c  | 13 +++++++------
 drivers/block/xen-blkfront.c        |  2 +-
 include/xen/platform_pci.h          |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8bbe83b9d0b2..f084af0cb8e0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2629,8 +2629,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			aux-ide-disks -- unplug non-primary-master IDE devices
 			nics -- unplug network devices
 			all -- unplug all emulated devices (NICs and IDE disks)
-			ignore -- continue loading the Xen platform PCI driver even
-				if the version check failed
+			unnecessary -- unplugging emulated devices is
+				unnecessary even if the host did not respond to
+				the unplug protocol
 			never -- do not unplug even if version check succeeds
 
 	xirc2ps_cs=	[NET,PCMCIA]
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 070dfa0654bd..0f456386cce5 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -78,10 +78,11 @@ void __init xen_unplug_emulated_devices(void)
 	/* check the version of the xen platform PCI device */
 	r = check_platform_magic();
 	/* If the version matches enable the Xen platform PCI driver.
-	 * Also enable the Xen platform PCI driver if the version is really old
-	 * and the user told us to ignore it. */
+	 * Also enable the Xen platform PCI driver if the host does
+	 * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
+	 * but the user told us that unplugging is unnecessary. */
 	if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
-			(xen_emul_unplug & XEN_UNPLUG_IGNORE)))
+			(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
 		return;
 	/* Set the default value of xen_emul_unplug depending on whether or
 	 * not the Xen PV frontends and the Xen platform PCI driver have
@@ -102,7 +103,7 @@ void __init xen_unplug_emulated_devices(void)
 		}
 	}
 	/* Now unplug the emulated devices */
-	if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
+	if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
 		outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
 	xen_platform_pci_unplug = xen_emul_unplug;
 }
@@ -128,8 +129,8 @@ static int __init parse_xen_emul_unplug(char *arg)
 			xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
 		else if (!strncmp(p, "nics", l))
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
-		else if (!strncmp(p, "ignore", l))
-			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+		else if (!strncmp(p, "unnecessary", l))
+			xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
 		else if (!strncmp(p, "never", l))
 			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ac1b682edecb..ab735a605cf3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -834,7 +834,7 @@ static int blkfront_probe(struct xenbus_device *dev,
 		char *type;
 		int len;
 		/* no unplug has been done: do not hook devices != xen vbds */
-		if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) {
+		if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
 			int major;
 
 			if (!VDEV_IS_EXTENDED(vdevice))
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 123b7752fa6a..590ccfd82645 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -20,7 +20,7 @@
 #define XEN_UNPLUG_ALL_NICS 2
 #define XEN_UNPLUG_AUX_IDE_DISKS 4
 #define XEN_UNPLUG_ALL 7
-#define XEN_UNPLUG_IGNORE 8
+#define XEN_UNPLUG_UNNECESSARY 8
 #define XEN_UNPLUG_NEVER 16
 
 static inline int xen_must_unplug_nics(void) {
-- 
cgit v1.2.3-59-g8ed1b


From 9c35e90c6fcf7f5baf27a63d9565e9f47633f299 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 12:01:35 +0100
Subject: xen: pvhvm: make it clearer that XEN_UNPLUG_* define bits in a
 bitfield

by defining in terms of (1<<N).

XEN_UNPLUG_UNNECESSARY and XEN_UNPLUG_NEVER are only used within the
kernel and are not defined as a bit on the unplug IO port. Therefore
use a bit which is outside the potentially valid range of the 16 bit
IO port.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 include/xen/platform_pci.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 590ccfd82645..a785a3b0c8c7 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -16,12 +16,15 @@
 #define XEN_IOPORT_PROTOVER	(XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
 #define XEN_IOPORT_PRODNUM	(XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
 
-#define XEN_UNPLUG_ALL_IDE_DISKS 1
-#define XEN_UNPLUG_ALL_NICS 2
-#define XEN_UNPLUG_AUX_IDE_DISKS 4
-#define XEN_UNPLUG_ALL 7
-#define XEN_UNPLUG_UNNECESSARY 8
-#define XEN_UNPLUG_NEVER 16
+#define XEN_UNPLUG_ALL_IDE_DISKS	(1<<0)
+#define XEN_UNPLUG_ALL_NICS		(1<<1)
+#define XEN_UNPLUG_AUX_IDE_DISKS	(1<<2)
+#define XEN_UNPLUG_ALL			(XEN_UNPLUG_ALL_IDE_DISKS|\
+					 XEN_UNPLUG_ALL_NICS|\
+					 XEN_UNPLUG_AUX_IDE_DISKS)
+
+#define XEN_UNPLUG_UNNECESSARY 		(1<<16)
+#define XEN_UNPLUG_NEVER	 		(1<<17)
 
 static inline int xen_must_unplug_nics(void) {
 #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
-- 
cgit v1.2.3-59-g8ed1b


From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Mon, 23 Aug 2010 13:52:19 +0200
Subject: block: add function call to switch the IO scheduler from a driver

Currently drivers must do an elevator_exit() + elevator_init()
to switch IO schedulers. There are a few problems with this:

- Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96,
  elevator_init() requires a zeroed out q->elevator
  pointer. The two existing in-kernel users don't do that.

- It will only work at initialization time, since using the
  above two-staged construct does not properly quisce the queue.

So add elevator_change() which takes care of this, and convert
the elv_iosched_store() sysfs interface to use this helper as well.

Reported-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reported-by: Kevin Vigor <kevin@vigor.nu>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/elevator.c         | 44 +++++++++++++++++++++++++++++++-------------
 include/linux/elevator.h |  1 +
 2 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index ec585c9554d3..205b09a5bd9e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1009,18 +1009,19 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	struct elevator_queue *old_elevator, *e;
 	void *data;
+	int err;
 
 	/*
 	 * Allocate new elevator
 	 */
 	e = elevator_alloc(q, new_e);
 	if (!e)
-		return 0;
+		return -ENOMEM;
 
 	data = elevator_init_queue(q, e);
 	if (!data) {
 		kobject_put(&e->kobj);
-		return 0;
+		return -ENOMEM;
 	}
 
 	/*
@@ -1043,7 +1044,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	__elv_unregister_queue(old_elevator);
 
-	if (elv_register_queue(q))
+	err = elv_register_queue(q);
+	if (err)
 		goto fail_register;
 
 	/*
@@ -1056,7 +1058,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
 
-	return 1;
+	return 0;
 
 fail_register:
 	/*
@@ -1071,17 +1073,19 @@ fail_register:
 	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
 	spin_unlock_irq(q->queue_lock);
 
-	return 0;
+	return err;
 }
 
-ssize_t elv_iosched_store(struct request_queue *q, const char *name,
-			  size_t count)
+/*
+ * Switch this queue to the given IO scheduler.
+ */
+int elevator_change(struct request_queue *q, const char *name)
 {
 	char elevator_name[ELV_NAME_MAX];
 	struct elevator_type *e;
 
 	if (!q->elevator)
-		return count;
+		return -ENXIO;
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	e = elevator_get(strstrip(elevator_name));
@@ -1092,13 +1096,27 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
 		elevator_put(e);
-		return count;
+		return 0;
 	}
 
-	if (!elevator_switch(q, e))
-		printk(KERN_ERR "elevator: switch to %s failed\n",
-							elevator_name);
-	return count;
+	return elevator_switch(q, e);
+}
+EXPORT_SYMBOL(elevator_change);
+
+ssize_t elv_iosched_store(struct request_queue *q, const char *name,
+			  size_t count)
+{
+	int ret;
+
+	if (!q->elevator)
+		return count;
+
+	ret = elevator_change(q, name);
+	if (!ret)
+		return count;
+
+	printk(KERN_ERR "elevator: switch to %s failed\n", name);
+	return ret;
 }
 
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2c958f4fce1e..926b50322a46 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
 extern int elevator_init(struct request_queue *, char *);
 extern void elevator_exit(struct elevator_queue *);
+extern int elevator_change(struct request_queue *, const char *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 8488a38f4d2f43bd55a3e0db4cd57a5bef3af6d6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Aug 2010 15:01:02 +0100
Subject: kobject: Break the kobject namespace defs into their own header

Break the kobject namespace defs into their own header to avoid a header file
inclusion ordering problem between linux/sysfs.h and linux/kobject.h.

This fixes the build breakage on older versions of gcc.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h    | 35 +----------------------------
 include/linux/kobject_ns.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h      |  1 +
 3 files changed, 58 insertions(+), 34 deletions(-)
 create mode 100644 include/linux/kobject_ns.h

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index cf343a852534..7950a37a7146 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/kref.h>
+#include <linux/kobject_ns.h>
 #include <linux/kernel.h>
 #include <linux/wait.h>
 #include <asm/atomic.h>
@@ -136,42 +137,8 @@ struct kobj_attribute {
 
 extern const struct sysfs_ops kobj_sysfs_ops;
 
-/*
- * Namespace types which are used to tag kobjects and sysfs entries.
- * Network namespace will likely be the first.
- */
-enum kobj_ns_type {
-	KOBJ_NS_TYPE_NONE = 0,
-	KOBJ_NS_TYPE_NET,
-	KOBJ_NS_TYPES
-};
-
 struct sock;
 
-/*
- * Callbacks so sysfs can determine namespaces
- *   @current_ns: return calling task's namespace
- *   @netlink_ns: return namespace to which a sock belongs (right?)
- *   @initial_ns: return the initial namespace (i.e. init_net_ns)
- */
-struct kobj_ns_type_operations {
-	enum kobj_ns_type type;
-	const void *(*current_ns)(void);
-	const void *(*netlink_ns)(struct sock *sk);
-	const void *(*initial_ns)(void);
-};
-
-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
-int kobj_ns_type_registered(enum kobj_ns_type type);
-const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
-const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
-
-const void *kobj_ns_current(enum kobj_ns_type type);
-const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
-const void *kobj_ns_initial(enum kobj_ns_type type);
-void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
-
-
 /**
  * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
  *
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
new file mode 100644
index 000000000000..82cb5bf461fb
--- /dev/null
+++ b/include/linux/kobject_ns.h
@@ -0,0 +1,56 @@
+/* Kernel object name space definitions
+ *
+ * Copyright (c) 2002-2003 Patrick Mochel
+ * Copyright (c) 2002-2003 Open Source Development Labs
+ * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (c) 2006-2008 Novell Inc.
+ *
+ * Split from kobject.h by David Howells (dhowells@redhat.com)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please read Documentation/kobject.txt before using the kobject
+ * interface, ESPECIALLY the parts about reference counts and object
+ * destructors.
+ */
+
+#ifndef _LINUX_KOBJECT_NS_H
+#define _LINUX_KOBJECT_NS_H
+
+struct sock;
+struct kobject;
+
+/*
+ * Namespace types which are used to tag kobjects and sysfs entries.
+ * Network namespace will likely be the first.
+ */
+enum kobj_ns_type {
+	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPE_NET,
+	KOBJ_NS_TYPES
+};
+
+/*
+ * Callbacks so sysfs can determine namespaces
+ *   @current_ns: return calling task's namespace
+ *   @netlink_ns: return namespace to which a sock belongs (right?)
+ *   @initial_ns: return the initial namespace (i.e. init_net_ns)
+ */
+struct kobj_ns_type_operations {
+	enum kobj_ns_type type;
+	const void *(*current_ns)(void);
+	const void *(*netlink_ns)(struct sock *sk);
+	const void *(*initial_ns)(void);
+};
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
+int kobj_ns_type_registered(enum kobj_ns_type type);
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
+
+const void *kobj_ns_current(enum kobj_ns_type type);
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
+const void *kobj_ns_initial(enum kobj_ns_type type);
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
+
+#endif /* _LINUX_KOBJECT_NS_H */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 3c92121ba9af..96eb576d82fd 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
+#include <linux/kobject_ns.h>
 #include <asm/atomic.h>
 
 struct kobject;
-- 
cgit v1.2.3-59-g8ed1b


From d187abb9a83e6c6b6e9f2ca17962bdeafb4bc903 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 11 Aug 2010 12:07:13 -0700
Subject: USB: gadget: fix composite kernel-doc warnings

Warning(include/linux/usb/composite.h:284): No description found for parameter 'disconnect'
Warning(drivers/usb/gadget/composite.c:744): No description found for parameter 'c'
Warning(drivers/usb/gadget/composite.c:744): Excess function parameter 'cdev' description in 'usb_string_ids_n'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 4 ++--
 include/linux/usb/composite.h  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index e483f80822d2..1160c55de7f2 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -723,12 +723,12 @@ int usb_string_ids_tab(struct usb_composite_dev *cdev, struct usb_string *str)
 
 /**
  * usb_string_ids_n() - allocate unused string IDs in batch
- * @cdev: the device whose string descriptor IDs are being allocated
+ * @c: the device whose string descriptor IDs are being allocated
  * @n: number of string IDs to allocate
  * Context: single threaded during gadget setup
  *
  * Returns the first requested ID.  This ID and next @n-1 IDs are now
- * valid IDs.  At least providind that @n is non zore because if it
+ * valid IDs.  At least provided that @n is non-zero because if it
  * is, returns last requested ID which is now very useful information.
  *
  * @usb_string_ids_n() is called from bind() callbacks to allocate
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 890bc1472190..617068134ae8 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -247,6 +247,7 @@ int usb_add_config(struct usb_composite_dev *,
  *	value; it should return zero on successful initialization.
  * @unbind: Reverses @bind(); called as a side effect of unregistering
  *	this driver.
+ * @disconnect: optional driver disconnect method
  * @suspend: Notifies when the host stops sending USB traffic,
  *	after function notifications
  * @resume: Notifies configuration when the host restarts USB traffic,
-- 
cgit v1.2.3-59-g8ed1b


From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 24 Aug 2010 14:22:47 +0200
Subject: workqueue: improve destroy_workqueue() debuggability

Now that the worklist is global, having works pending after wq
destruction can easily lead to oops and destroy_workqueue() have
several BUG_ON()s to catch these cases.  Unfortunately, BUG_ON()
doesn't tell much about how the work became pending after the final
flush_workqueue().

This patch adds WQ_DYING which is set before the final flush begins.
If a work is requested to be queued on a dying workqueue,
WARN_ON_ONCE() is triggered and the request is ignored.  This clearly
indicates which caller is trying to queue a work on a dying workqueue
and keeps the system working in most cases.

Locking rule comment is updated such that the 'I' rule includes
modifying the field from destruction path.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 ++
 kernel/workqueue.c        | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f9d277bcd9a..c959666eafca 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -241,6 +241,8 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
+	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cc3456f96c56..362b50d092e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -87,7 +87,8 @@ enum {
 /*
  * Structure fields follow one of the following exclusion rules.
  *
- * I: Set during initialization and read-only afterwards.
+ * I: Modifiable by initialization/destruction paths and read-only for
+ *    everyone else.
  *
  * P: Preemption protected.  Disabling preemption is enough and should
  *    only be modified and accessed from the local cpu.
@@ -944,6 +945,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
+	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+		return;
+
 	/* determine gcwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
@@ -2828,6 +2832,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 {
 	unsigned int cpu;
 
+	wq->flags |= WQ_DYING;
 	flush_workqueue(wq);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Tue, 24 Aug 2010 11:44:18 -0700
Subject: guard page for stacks that grow upwards

pa-risc and ia64 have stacks that grow upwards. Check that
they do not run into other mappings. By making VM_GROWSUP
0x0 on architectures that do not ever use it, we can avoid
some unpleasant #ifdefs in check_stack_guard_page().

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  8 +++++++-
 mm/memory.c        | 15 +++++++++++----
 mm/mmap.c          |  3 ---
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 709f6728fc90..831c693416b2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -78,7 +78,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MAYSHARE	0x00000080
 
 #define VM_GROWSDOWN	0x00000100	/* general info on the segment */
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
 #define VM_GROWSUP	0x00000200
+#else
+#define VM_GROWSUP	0x00000000
+#endif
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 
@@ -1330,8 +1334,10 @@ unsigned long ra_submit(struct file_ra_state *ra,
 
 /* Do stack extension */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
-#ifdef CONFIG_IA64
+#if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#else
+  #define expand_upwards(vma, address) do { } while (0)
 #endif
 extern int expand_stack_downwards(struct vm_area_struct *vma,
 				  unsigned long address);
diff --git a/mm/memory.c b/mm/memory.c
index 2ed2267439df..6b2ab1051851 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2760,11 +2760,9 @@ out_release:
 }
 
 /*
- * This is like a special single-page "expand_downwards()",
- * except we must first make sure that 'address-PAGE_SIZE'
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
  * doesn't hit another vma.
- *
- * The "find_vma()" will do the right thing even if we wrap
  */
 static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
 {
@@ -2783,6 +2781,15 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 
 		expand_stack(vma, address - PAGE_SIZE);
 	}
+	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+		struct vm_area_struct *next = vma->vm_next;
+
+		/* As VM_GROWSDOWN but s/below/above/ */
+		if (next && next->vm_start == address + PAGE_SIZE)
+			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+		expand_upwards(vma, address + PAGE_SIZE);
+	}
 	return 0;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 331e51af38c9..6128dc8e5ede 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1716,9 +1716,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-#ifndef CONFIG_IA64
-static
-#endif
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	int error;
-- 
cgit v1.2.3-59-g8ed1b


From 2b8fd9186d9275b07aef43e5bb4e98cd571f9a7d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 23 Aug 2010 23:55:59 +0200
Subject: ACPI/PCI: Do not preserve _OSC control bits returned by a query

There is the assumption in acpi_pci_osc_control_set() that it is
always sufficient to compare the mask of _OSC control bits to be
requested with the result of an _OSC query where all of the known
control bits have been checked.  However, in general, that need not
be the case.  For example, if an _OSC feature A depends on an _OSC
feature B and control of A, B plus another _OSC feature C is
requested simultaneously, the BIOS may return A, B, C, while it would
only return C if A and C were requested without B.

That may result in passing a wrong mask of _OSC control bits to an
_OSC control request, in which case the BIOS may only grant control
of a subset of the requested features.  Moreover, acpi_pci_run_osc()
will return error code if that happens and the caller of
acpi_pci_osc_control_set() will not know that it's been granted
control of some _OSC features.  Consequently, the system will
generally not work as expected.

Apart from this acpi_pci_osc_control_set() always uses the mask
of _OSC control bits returned by the very first invocation of
acpi_pci_query_osc(), but that is done with the second argument
equal to OSC_PCI_SEGMENT_GROUPS_SUPPORT which generally happens
to affect the returned _OSC control bits.

For these reasons, make acpi_pci_osc_control_set() always check if
control of the requested _OSC features will be granted before making
the final control request.  As a result, the osc_control_qry and
osc_queried members of struct acpi_pci_root are not necessary any
more, so drop them and remove the remaining code referring to them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c | 20 +++++++-------------
 include/acpi/acpi_bus.h |  3 ---
 2 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d2ae816df0f5..77cd19697b1e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -249,12 +249,8 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
 		root->osc_support_set = support;
-		if (control) {
+		if (control)
 			*control = result;
-		} else {
-			root->osc_control_qry = result;
-			root->osc_queried = 1;
-		}
 	}
 	return status;
 }
@@ -409,14 +405,12 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 		goto out;
 
 	/* Need to query controls first before requesting them */
-	if (!root->osc_queried) {
-		status = acpi_pci_query_osc(root, root->osc_support_set, NULL);
-		if (ACPI_FAILURE(status))
-			goto out;
-	}
-	if ((root->osc_control_qry & control_req) != control_req) {
-		printk(KERN_DEBUG
-		       "Firmware did not grant requested _OSC control\n");
+	flags = control_req;
+	status = acpi_pci_query_osc(root, root->osc_support_set, &flags);
+	if (ACPI_FAILURE(status))
+		goto out;
+
+	if (flags != control_req) {
 		status = AE_SUPPORT;
 		goto out;
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index baacd98e7cc6..4de84ce3a927 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -377,9 +377,6 @@ struct acpi_pci_root {
 
 	u32 osc_support_set;	/* _OSC state of support bits */
 	u32 osc_control_set;	/* _OSC state of control bits */
-	u32 osc_control_qry;	/* the latest _OSC query result */
-
-	u32 osc_queried:1;	/* has _OSC control been queried? */
 };
 
 /* helper */
-- 
cgit v1.2.3-59-g8ed1b


From 75fb60f26befb59dbfa05cb122972642b7bdd219 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 23 Aug 2010 23:53:11 +0200
Subject: ACPI/PCI: Negotiate _OSC control bits before requesting them

It is possible that the BIOS will not grant control of all _OSC
features requested via acpi_pci_osc_control_set(), so it is
recommended to negotiate the final set of _OSC features with the
query flag set before calling _OSC to request control of these
features.

To implement it, rework acpi_pci_osc_control_set() so that the caller
can specify the mask of _OSC control bits to negotiate and the mask
of _OSC control bits that are absolutely necessary to it.  Then,
acpi_pci_osc_control_set() will run _OSC queries in a loop until
the mask of _OSC control bits returned by the BIOS is equal to the
mask passed to it.  Also, before running the _OSC request
acpi_pci_osc_control_set() will check if the caller's required
control bits are present in the final mask.

Using this mechanism we will be able to avoid situations in which the
BIOS doesn't grant control of certain _OSC features, because they
depend on some other _OSC features that have not been requested.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c              | 59 +++++++++++++++++++++++-------------
 drivers/pci/hotplug/acpi_pcihp.c     |  2 +-
 drivers/pci/pcie/aer/aerdrv_acpi.c   |  6 ++--
 drivers/pci/pcie/pme/pcie_pme_acpi.c |  8 +++--
 include/linux/acpi.h                 |  4 +--
 5 files changed, 49 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 77cd19697b1e..c34713112520 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -374,21 +374,32 @@ out:
 EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
 
 /**
- * acpi_pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
+ * acpi_pci_osc_control_set - Request control of PCI root _OSC features.
+ * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex).
+ * @mask: Mask of _OSC bits to request control of, place to store control mask.
+ * @req: Mask of _OSC bits the control of is essential to the caller.
  *
- * Attempt to take control from Firmware on requested control bits.
+ * Run _OSC query for @mask and if that is successful, compare the returned
+ * mask of control bits with @req.  If all of the @req bits are set in the
+ * returned mask, run _OSC request for it.
+ *
+ * The variable at the @mask address may be modified regardless of whether or
+ * not the function returns success.  On success it will contain the mask of
+ * _OSC bits the BIOS has granted control of, but its contents are meaningless
+ * on failure.
  **/
-acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
+acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
 {
+	struct acpi_pci_root *root;
 	acpi_status status;
-	u32 control_req, result, capbuf[3];
+	u32 ctrl, capbuf[3];
 	acpi_handle tmp;
-	struct acpi_pci_root *root;
 
-	control_req = (flags & OSC_PCI_CONTROL_MASKS);
-	if (!control_req)
+	if (!mask)
+		return AE_BAD_PARAMETER;
+
+	ctrl = *mask & OSC_PCI_CONTROL_MASKS;
+	if ((ctrl & req) != req)
 		return AE_TYPE;
 
 	root = acpi_pci_find_root(handle);
@@ -400,27 +411,33 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 		return status;
 
 	mutex_lock(&osc_lock);
+
+	*mask = ctrl | root->osc_control_set;
 	/* No need to evaluate _OSC if the control was already granted. */
-	if ((root->osc_control_set & control_req) == control_req)
+	if ((root->osc_control_set & ctrl) == ctrl)
 		goto out;
 
-	/* Need to query controls first before requesting them */
-	flags = control_req;
-	status = acpi_pci_query_osc(root, root->osc_support_set, &flags);
-	if (ACPI_FAILURE(status))
-		goto out;
+	/* Need to check the available controls bits before requesting them. */
+	while (*mask) {
+		status = acpi_pci_query_osc(root, root->osc_support_set, mask);
+		if (ACPI_FAILURE(status))
+			goto out;
+		if (ctrl == *mask)
+			break;
+		ctrl = *mask;
+	}
 
-	if (flags != control_req) {
+	if ((ctrl & req) != req) {
 		status = AE_SUPPORT;
 		goto out;
 	}
 
 	capbuf[OSC_QUERY_TYPE] = 0;
 	capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
-	capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req;
-	status = acpi_pci_run_osc(handle, capbuf, &result);
+	capbuf[OSC_CONTROL_TYPE] = ctrl;
+	status = acpi_pci_run_osc(handle, capbuf, mask);
 	if (ACPI_SUCCESS(status))
-		root->osc_control_set = result;
+		root->osc_control_set = *mask;
 out:
 	mutex_unlock(&osc_lock);
 	return status;
@@ -551,8 +568,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (flags != base_flags)
 		acpi_pci_osc_support(root, flags);
 
-	status = acpi_pci_osc_control_set(root->device->handle,
-					  OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+	status = acpi_pci_osc_control_set(root->device->handle, &flags, flags);
 
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n");
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 45fcc1e96df9..3d93d529a7bd 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -360,7 +360,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
 		dbg("Trying to get hotplug control for %s\n",
 				(char *)string.pointer);
-		status = acpi_pci_osc_control_set(handle, flags);
+		status = acpi_pci_osc_control_set(handle, &flags, flags);
 		if (ACPI_SUCCESS(status))
 			goto got_one;
 		if (status == AE_SUPPORT)
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index f278d7b0d95d..3a276a0cea93 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -39,9 +39,9 @@ int aer_osc_setup(struct pcie_device *pciedev)
 
 	handle = acpi_find_root_bridge_handle(pdev);
 	if (handle) {
-		status = acpi_pci_osc_control_set(handle,
-					OSC_PCI_EXPRESS_AER_CONTROL |
-					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+		u32 flags = OSC_PCI_EXPRESS_AER_CONTROL |
+				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+		status = acpi_pci_osc_control_set(handle, &flags, flags);
 	}
 
 	if (ACPI_FAILURE(status)) {
diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c
index 83ab2287ae3f..be20222b12d3 100644
--- a/drivers/pci/pcie/pme/pcie_pme_acpi.c
+++ b/drivers/pci/pcie/pme/pcie_pme_acpi.c
@@ -28,6 +28,7 @@ int pcie_pme_acpi_setup(struct pcie_device *srv)
 	acpi_status status = AE_NOT_FOUND;
 	struct pci_dev *port = srv->port;
 	acpi_handle handle;
+	u32 flags;
 	int error = 0;
 
 	if (acpi_pci_disabled)
@@ -39,9 +40,10 @@ int pcie_pme_acpi_setup(struct pcie_device *srv)
 	if (!handle)
 		return -EINVAL;
 
-	status = acpi_pci_osc_control_set(handle,
-			OSC_PCI_EXPRESS_PME_CONTROL |
-			OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	flags = OSC_PCI_EXPRESS_PME_CONTROL |
+		OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+
+	status = acpi_pci_osc_control_set(handle, &flags, flags);
 	if (ACPI_FAILURE(status)) {
 		dev_info(&port->dev,
 			"Failed to receive control of PCIe PME service: %s\n",
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ccf94dc5acdf..c227757feb06 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
-
-extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
+extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
+					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
 #else	/* !CONFIG_ACPI */
-- 
cgit v1.2.3-59-g8ed1b


From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 25 Aug 2010 10:33:56 +0200
Subject: workqueue: fix cwq->nr_active underflow

cwq->nr_active is used to keep track of how many work items are active
for the cpu workqueue, where 'active' is defined as either pending on
global worklist or executing.  This is used to implement the
max_active limit and workqueue freezing.  If a work item is queued
after nr_active has already reached max_active, the work item doesn't
increment nr_active and is put on the delayed queue and gets activated
later as previous active work items retire.

try_to_grab_pending() which is used in the cancellation path
unconditionally decremented nr_active whether the work item being
cancelled is currently active or delayed, so cancelling a delayed work
item makes nr_active underflow.  This breaks max_active enforcement
and triggers BUG_ON() in destroy_workqueue() later on.

This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which
is set while a work item in on the delayed list and making
try_to_grab_pending() decrement nr_active iff the work item is
currently active.

The addition of the flag enlarges cwq alignment to 256 bytes which is
getting a bit too large.  It's scheduled to be reduced back to 128
bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next
devel cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/workqueue.h | 16 +++++++++-------
 kernel/workqueue.c        | 30 ++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c959666eafca..f11100f96482 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
-	WORK_STRUCT_CWQ_BIT	= 1,	/* data points to cwq */
-	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
+	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
+	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 3,	/* static initializer (debugobjects) */
-	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
+	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
+	WORK_STRUCT_COLOR_SHIFT	= 5,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
+	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
 #endif
 
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
 	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -59,8 +61,8 @@ enum {
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 128 bytes which isn't too
-	 * excessive while allowing 15 workqueue flush colors.
+	 * off.  This makes cwqs aligned to 256 bytes and allows 15
+	 * workqueue flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 362b50d092e2..a2dccfca03ba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -941,6 +941,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct global_cwq *gcwq;
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
+	unsigned int work_flags;
 	unsigned long flags;
 
 	debug_work_activate(work);
@@ -990,14 +991,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	BUG_ON(!list_empty(&work->entry));
 
 	cwq->nr_in_flight[cwq->work_color]++;
+	work_flags = work_color_to_flags(cwq->work_color);
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
 		cwq->nr_active++;
 		worklist = gcwq_determine_ins_pos(gcwq, cwq);
-	} else
+	} else {
+		work_flags |= WORK_STRUCT_DELAYED;
 		worklist = &cwq->delayed_works;
+	}
 
-	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+	insert_work(cwq, work, worklist, work_flags);
 
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
@@ -1666,6 +1670,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
 	move_linked_works(work, pos, NULL);
+	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
 	cwq->nr_active++;
 }
 
@@ -1673,6 +1678,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
  * @color: color of work which left the queue
+ * @delayed: for a delayed work
  *
  * A work either has completed or is removed from pending queue,
  * decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1680,19 +1686,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * CONTEXT:
  * spin_lock_irq(gcwq->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+				 bool delayed)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
 	cwq->nr_in_flight[color]--;
-	cwq->nr_active--;
 
-	if (!list_empty(&cwq->delayed_works)) {
-		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+	if (!delayed) {
+		cwq->nr_active--;
+		if (!list_empty(&cwq->delayed_works)) {
+			/* one down, submit a delayed one */
+			if (cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+		}
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -1823,7 +1832,7 @@ __acquires(&gcwq->lock)
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	cwq_dec_nr_in_flight(cwq, work_color, false);
 }
 
 /**
@@ -2388,7 +2397,8 @@ static int try_to_grab_pending(struct work_struct *work)
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
 			cwq_dec_nr_in_flight(get_work_cwq(work),
-					     get_work_color(work));
+				get_work_color(work),
+				*work_data_bits(work) & WORK_STRUCT_DELAYED);
 			ret = 1;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 Aug 2010 02:27:49 -0700
Subject: tcp: Combat per-cpu skew in orphan tests.

As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.

Fix this by doing the full expensive sum check if the optimized check
triggers.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/tcp.h    | 18 ++++++++++++++----
 net/ipv4/tcp.c       |  5 +----
 net/ipv4/tcp_timer.c |  8 ++++----
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index df6a2eb20193..eaa9582779d0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 	return seq3 - seq2 >= seq1 - seq2;
 }
 
-static inline int tcp_too_many_orphans(struct sock *sk, int num)
+static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
 {
-	return (num > sysctl_tcp_max_orphans) ||
-		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
+	struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+	int orphans = percpu_counter_read_positive(ocp);
+
+	if (orphans << shift > sysctl_tcp_max_orphans) {
+		orphans = percpu_counter_sum_positive(ocp);
+		if (orphans << shift > sysctl_tcp_max_orphans)
+			return true;
+	}
+
+	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+	    atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
+		return true;
+	return false;
 }
 
 /* syncookies: remember time of last synqueue overflow */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..197b9b77fa3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2011,11 +2011,8 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		int orphan_count = percpu_counter_read_positive(
-						sk->sk_prot->orphan_count);
-
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, orphan_count)) {
+		if (tcp_too_many_orphans(sk, 0)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..c35b469e851c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
+	int shift = 0;
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
-		orphans <<= 1;
+		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
 	if (sk->sk_err_soft)
-		orphans <<= 1;
+		shift++;
 
-	if (tcp_too_many_orphans(sk, orphans)) {
+	if (tcp_too_many_orphans(sk, shift)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
-- 
cgit v1.2.3-59-g8ed1b


From 04cbe1de6fbda9649a6f25666194e6955d3e717e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 19 Aug 2010 21:29:43 +0100
Subject: vgaarb: Wrap vga_(get|put) in CONFIG_VGA_ARB

Fix link failure without the vga arbitrator.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 6228b5b77d35..e9e1524b582c 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -93,8 +93,11 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
  *     Nested calls are supported (a per-resource counter is maintained)
  */
 
-extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
-											int interruptible);
+#if defined(CONFIG_VGA_ARB)
+extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible);
+#else
+static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { return 0; }
+#endif
 
 /**
  *     vga_get_interruptible
@@ -131,7 +134,11 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev,
  *     are already locked by another card. It can be called in any context
  */
 
+#if defined(CONFIG_VGA_ARB)
 extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
+#else
+static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; }
+#endif
 
 /**
  *     vga_put         - release lock on legacy VGA resources
@@ -146,7 +153,11 @@ extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
  *     released if the counter reaches 0.
  */
 
+#if defined(CONFIG_VGA_ARB)
 extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
+#else
+#define vga_put(pdev, rsrc)
+#endif
 
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 0fb85621df4f9f7c663c6c77c302e821a832c95e Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@sophos.com>
Date: Fri, 20 Aug 2010 10:02:15 +0100
Subject: fanotify: resize pid and reorder structure

resize pid and reorder the fanotify_event_metadata so it is naturally
aligned and we can work towards dropping the packed attributed

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@sophos.com>
Cc: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 985435622ecd..63531a6b4d2a 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -65,14 +65,14 @@
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
 
-#define FANOTIFY_METADATA_VERSION	1
+#define FANOTIFY_METADATA_VERSION	2
 
 struct fanotify_event_metadata {
 	__u32 event_len;
 	__u32 vers;
-	__s32 fd;
 	__u64 mask;
-	__s64 pid;
+	__s32 fd;
+	__s32 pid;
 } __attribute__ ((packed));
 
 struct fanotify_response {
-- 
cgit v1.2.3-59-g8ed1b


From bad849b3dc0fae1297c8d47f846f8d202a6145ed Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Aug 2010 16:00:34 +0100
Subject: NOMMU: Stub out vm_get_page_prot() if there's no MMU

Stub out vm_get_page_prot() if there's no MMU.

This was added by commit 804af2cf6e7a ("[AGPGART] remove private page
protection map") and is used in commit c07fbfd17e61 ("fbmem: VM_IO set,
but not propagated") in the fbmem video driver, but the function doesn't
exist on NOMMU, resulting in an undefined symbol at link time.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 831c693416b2..e6b1210772ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1363,7 +1363,15 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+#ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
+#else
+static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	return __pgprot(0);
+}
+#endif
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
-- 
cgit v1.2.3-59-g8ed1b


From a28dec2f26013aad89446b1f708f948617bc28a2 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@mvista.com>
Date: Sun, 8 Aug 2010 18:03:33 +0400
Subject: powerpc/85xx: Add P1021 PCI IDs and quirks

This is needed for proper PCI-E support on P1021 SoCs.

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 2 ++
 include/linux/pci_ids.h       | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 209384b6e039..4ae933225251 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -399,6 +399,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..10d33309e9a6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2300,6 +2300,8 @@
 #define PCI_DEVICE_ID_P2010		0x0079
 #define PCI_DEVICE_ID_P1020E		0x0100
 #define PCI_DEVICE_ID_P1020		0x0101
+#define PCI_DEVICE_ID_P1021E		0x0102
+#define PCI_DEVICE_ID_P1021		0x0103
 #define PCI_DEVICE_ID_P1011E		0x0108
 #define PCI_DEVICE_ID_P1011		0x0109
 #define PCI_DEVICE_ID_P1022E		0x0110
-- 
cgit v1.2.3-59-g8ed1b


From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Wed, 1 Sep 2010 08:55:24 -0600
Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case

With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared,
so the following pops up on PowerPC:

  cc1: warnings being treated as errors
  In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19:
  include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared
                              inside parameter list
  include/linux/of_gpio.h:74: warning: its scope is only this definition
                              or declaration, which is probably not what
			      you want
  include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared
                              inside parameter list
  make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1

This patch fixes the issue by providing the proper forward declaration.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 03f616b78cfa..e41f7dd1ae67 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 
 struct device;
+struct gpio_chip;
 
 /*
  * Some platforms don't support the GPIO programming interface.
-- 
cgit v1.2.3-59-g8ed1b


From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 2 Sep 2010 15:48:16 -0700
Subject: mutex: Fix annotations to include it in kernel-locking docbook

Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c,
then add these 2 files to the kernel-locking docbook as the
Mutex API reference chapter.

Add one API function to mutex-design.txt and correct a typo in
that file.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/DocBook/kernel-locking.tmpl |  6 ++++++
 Documentation/mutex-design.txt            |  3 ++-
 include/linux/mutex.h                     |  8 ++++++++
 kernel/mutex.c                            | 23 +++++++----------------
 4 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 0b1a3f97f285..a0d479d1e1dd 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1961,6 +1961,12 @@ machines due to caching.
    </sect1>
   </chapter>
 
+  <chapter id="apiref">
+   <title>Mutex API reference</title>
+!Iinclude/linux/mutex.h
+!Ekernel/mutex.c
+  </chapter>
+
   <chapter id="references">
    <title>Further reading</title>
 
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index c91ccc0720fa..38c10fd7f411 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt
@@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler
 mutex semantics are sufficient for your code, then there are a couple
 of advantages of mutexes:
 
- - 'struct mutex' is smaller on most architectures: .e.g on x86,
+ - 'struct mutex' is smaller on most architectures: E.g. on x86,
    'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
    A smaller structure size means less RAM footprint, and better
    CPU-cache utilization.
@@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined:
  void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
  int  mutex_lock_interruptible_nested(struct mutex *lock,
                                       unsigned int subclass);
+ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 878cab4f5fcc..f363bc8fdc74 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -78,6 +78,14 @@ struct mutex_waiter {
 # include <linux/mutex-debug.h>
 #else
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
+/**
+ * mutex_init - initialize the mutex
+ * @mutex: the mutex to be initialized
+ *
+ * Initialize the mutex to unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
 # define mutex_init(mutex) \
 do {							\
 	static struct lock_class_key __key;		\
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4c0b7b3e6d2e..200407c1502f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -36,15 +36,6 @@
 # include <asm/mutex.h>
 #endif
 
-/***
- * mutex_init - initialize the mutex
- * @lock: the mutex to be initialized
- * @key: the lock_class_key for the class; used by mutex lock debugging
- *
- * Initialize the mutex to unlocked state.
- *
- * It is not allowed to initialize an already locked mutex.
- */
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
@@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
 static __used noinline void __sched
 __mutex_lock_slowpath(atomic_t *lock_count);
 
-/***
+/**
  * mutex_lock - acquire the mutex
  * @lock: the mutex to be acquired
  *
@@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock);
 
 static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
 
-/***
+/**
  * mutex_unlock - release the mutex
  * @lock: the mutex to be released
  *
@@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count);
 static noinline int __sched
 __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
 
-/***
- * mutex_lock_interruptible - acquire the mutex, interruptable
+/**
+ * mutex_lock_interruptible - acquire the mutex, interruptible
  * @lock: the mutex to be acquired
  *
  * Lock the mutex like mutex_lock(), and return 0 if the mutex has
@@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 	return prev == 1;
 }
 
-/***
- * mutex_trylock - try acquire the mutex, without waiting
+/**
+ * mutex_trylock - try to acquire the mutex, without waiting
  * @lock: the mutex to be acquired
  *
  * Try to acquire the mutex atomically. Returns 1 if the mutex
  * has been acquired successfully, and 0 on contention.
  *
  * NOTE: this function follows the spin_trylock() convention, so
- * it is negated to the down_trylock() return values! Be careful
+ * it is negated from the down_trylock() return values! Be careful
  * about this when converting semaphore users to mutexes.
  *
  * This function must not be used in interrupt context. The
-- 
cgit v1.2.3-59-g8ed1b


From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 2 Sep 2010 15:42:43 +0000
Subject: cls_cgroup: Fix rcu lockdep warning

Dave reported an rcu lockdep warning on 2.6.35.4 kernel

task->cgroups and task->cgroups->subsys[i] are protected by RCU.
So we avoid accessing invalid pointers here. This might happen,
for example, when you are deref-ing those pointers while someone
move @task from one cgroup to another.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 726cc3536409..ef6c24a529e1 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -27,11 +27,17 @@ struct cgroup_cls_state
 #ifdef CONFIG_NET_CLS_CGROUP
 static inline u32 task_cls_classid(struct task_struct *p)
 {
+	int classid;
+
 	if (in_interrupt())
 		return 0;
 
-	return container_of(task_subsys_state(p, net_cls_subsys_id),
-			    struct cgroup_cls_state, css)->classid;
+	rcu_read_lock();
+	classid = container_of(task_subsys_state(p, net_cls_subsys_id),
+			       struct cgroup_cls_state, css)->classid;
+	rcu_read_unlock();
+
+	return classid;
 }
 #else
 extern int net_cls_subsys_id;
-- 
cgit v1.2.3-59-g8ed1b


From 71cad0554956de87c3fc413b1eac9313887eb14f Mon Sep 17 00:00:00 2001
From: Philippe Langlais <philippe.langlais@stericsson.com>
Date: Tue, 31 Aug 2010 14:19:09 +0200
Subject: serial: fix port type conflict between NS16550A & U6_16550A

Bug seen by Dr. David Alan Gilbert with sparse

Signed-off-by: Philippe Langlais <philippe.langlais@stericsson.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/serial.h      | 3 +--
 include/linux/serial_core.h | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 1ebc694a6d52..ef914061511e 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -77,8 +77,7 @@ struct serial_struct {
 #define PORT_16654	11
 #define PORT_16850	12
 #define PORT_RSA	13	/* RSA-DV II/S card */
-#define PORT_U6_16550A	14
-#define PORT_MAX	14
+#define PORT_MAX	13
 
 #define SERIAL_IO_PORT	0
 #define SERIAL_IO_HUB6	1
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 64458a9a8938..563e23400913 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -44,7 +44,8 @@
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
 #define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
-#define PORT_MAX_8250	18	/* max port ID */
+#define PORT_U6_16550A	19	/* ST-Ericsson U6xxx internal UART */
+#define PORT_MAX_8250	19	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v1.2.3-59-g8ed1b


From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 4 Sep 2010 22:56:44 +0200
Subject: io-mapping: Fix the address space annotations

Fixes a bunch of sparse warnings in io-mapping.h because of the
inconsistent __iomem usage.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/io-mapping.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0a6b3d5c490c..7fb592793738 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping)
 }
 
 /* Atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
 			 unsigned long offset,
 			 int slot)
@@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
 {
 	iounmap_atomic(vaddr, slot);
 }
 
-static inline void *
+static inline void __iomem *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
 	resource_size_t phys_addr;
@@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 }
 
 static inline void
-io_mapping_unmap(void *vaddr)
+io_mapping_unmap(void __iomem *vaddr)
 {
 	iounmap(vaddr);
 }
@@ -125,38 +125,38 @@ struct io_mapping;
 static inline struct io_mapping *
 io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
-	return (struct io_mapping *) ioremap_wc(base, size);
+	return (struct io_mapping __force *) ioremap_wc(base, size);
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
-	iounmap(mapping);
+	iounmap((void __force __iomem *) mapping);
 }
 
 /* Atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
 			 unsigned long offset,
 			 int slot)
 {
-	return ((char *) mapping) + offset;
+	return ((char __force __iomem *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
 {
 }
 
 /* Non-atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	return ((char *) mapping) + offset;
+	return ((char __force __iomem *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap(void *vaddr)
+io_mapping_unmap(void __iomem *vaddr)
 {
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 73457f0f836956747e0394320be2163c050e96ef Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 6 Aug 2010 01:59:14 +0300
Subject: cgroups: fix API thinko

cgroup_attach_task_current_cg API that have upstream is backwards: we
really need an API to attach to the cgroups from another process A to
the current one.

In our case (vhost), a priveledged user wants to attach it's task to cgroups
from a less priveledged one, the API makes us run it in the other
task's context, and this fails.

So let's make the API generic and just pass in 'from' and 'to' tasks.
Add an inline wrapper for cgroup_attach_task_current_cg to avoid
breaking bisect.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
---
 include/linux/cgroup.h | 11 ++++++++++-
 kernel/cgroup.c        |  9 +++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6e..5a53d8f039a2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,7 +578,11 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
-int cgroup_attach_task_current_cg(struct task_struct *);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+	return cgroup_attach_task_all(current, tsk);
+}
 
 /*
  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -636,6 +640,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 }
 
 /* No cgroups - nothing to do */
+static inline int cgroup_attach_task_all(struct task_struct *from,
+					 struct task_struct *t)
+{
+	return 0;
+}
 static inline int cgroup_attach_task_current_cg(struct task_struct *t)
 {
 	return 0;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a8ce09954404..9d90c08f3bde 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1789,10 +1789,11 @@ out:
 }
 
 /**
- * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
  * @tsk: the task to be attached
  */
-int cgroup_attach_task_current_cg(struct task_struct *tsk)
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 {
 	struct cgroupfs_root *root;
 	struct cgroup *cur_cg;
@@ -1800,7 +1801,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	cgroup_lock();
 	for_each_active_root(root) {
-		cur_cg = task_cgroup_from_root(current, root);
+		cur_cg = task_cgroup_from_root(from, root);
 		retval = cgroup_attach_task(cur_cg, tsk);
 		if (retval)
 			break;
@@ -1809,7 +1810,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
 /*
  * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
-- 
cgit v1.2.3-59-g8ed1b


From 831853c87fb7234a8650484d30993242ea9ad6d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 6 Sep 2010 16:08:56 +0100
Subject: ALSA: Add more jack button slots

Some devices have more flexible microphone detection and can detect
a wider range of buttons.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/jack.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/jack.h b/include/sound/jack.h
index d90b9fa32707..c140fc7cbd3f 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -47,6 +47,9 @@ enum snd_jack_types {
 	SND_JACK_BTN_0		= 0x4000,
 	SND_JACK_BTN_1		= 0x2000,
 	SND_JACK_BTN_2		= 0x1000,
+	SND_JACK_BTN_3		= 0x0800,
+	SND_JACK_BTN_4		= 0x0400,
+	SND_JACK_BTN_5		= 0x0200,
 };
 
 struct snd_jack {
@@ -55,7 +58,7 @@ struct snd_jack {
 	int type;
 	const char *id;
 	char name[100];
-	unsigned int key[3];   /* Keep in sync with definitions above */
+	unsigned int key[6];   /* Keep in sync with definitions above */
 	void *private_data;
 	void (*private_free)(struct snd_jack *);
 };
-- 
cgit v1.2.3-59-g8ed1b


From f8f235e5bbf4e61f3e0886a44afb1dc4cfe8f337 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 27 Aug 2010 11:08:57 +0800
Subject: agp/intel: Fix cache control for Sandybridge

Sandybridge GTT has new cache control bits in PTE, which controls
graphics page cache in LLC or LLC/MLC, so we need to extend the mask
function to respect the new bits.

And set cache control to always LLC only by default on Gen6.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@kernel.org
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-agp.c    |  1 +
 drivers/char/agp/intel-gtt.c    | 50 ++++++++++++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gem.c |  1 +
 include/linux/intel-gtt.h       | 20 +++++++++++++++++
 4 files changed, 62 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/intel-gtt.h

(limited to 'include')

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 710af89b176d..74461d177baf 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -12,6 +12,7 @@
 #include <asm/smp.h>
 #include "agp.h"
 #include "intel-agp.h"
+#include <linux/intel-gtt.h>
 
 #include "intel-gtt.c"
 
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 7f35854d33a3..64b10551a3f8 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -49,6 +49,26 @@ static struct gatt_mask intel_i810_masks[] =
 	 .type = INTEL_AGP_CACHED_MEMORY}
 };
 
+#define INTEL_AGP_UNCACHED_MEMORY              0
+#define INTEL_AGP_CACHED_MEMORY_LLC            1
+#define INTEL_AGP_CACHED_MEMORY_LLC_GFDT       2
+#define INTEL_AGP_CACHED_MEMORY_LLC_MLC        3
+#define INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT   4
+
+static struct gatt_mask intel_gen6_masks[] =
+{
+	{.mask = I810_PTE_VALID | GEN6_PTE_UNCACHED,
+	 .type = INTEL_AGP_UNCACHED_MEMORY },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC | GEN6_PTE_GFDT,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_GFDT },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC | GEN6_PTE_GFDT,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT },
+};
+
 static struct _intel_private {
 	struct pci_dev *pcidev;	/* device one */
 	u8 __iomem *registers;
@@ -178,13 +198,6 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
 					off_t pg_start, int mask_type)
 {
 	int i, j;
-	u32 cache_bits = 0;
-
-	if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB ||
-	    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB)
-	{
-		cache_bits = GEN6_PTE_LLC_MLC;
-	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
@@ -317,6 +330,23 @@ static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge,
 		return 0;
 }
 
+static int intel_gen6_type_to_mask_type(struct agp_bridge_data *bridge,
+					int type)
+{
+	unsigned int type_mask = type & ~AGP_USER_CACHED_MEMORY_GFDT;
+	unsigned int gfdt = type & AGP_USER_CACHED_MEMORY_GFDT;
+
+	if (type_mask == AGP_USER_UNCACHED_MEMORY)
+		return INTEL_AGP_UNCACHED_MEMORY;
+	else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC)
+		return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT :
+			      INTEL_AGP_CACHED_MEMORY_LLC_MLC;
+	else /* set 'normal'/'cached' to LLC by default */
+		return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_GFDT :
+			      INTEL_AGP_CACHED_MEMORY_LLC;
+}
+
+
 static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
 				int type)
 {
@@ -1163,7 +1193,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
 
 	mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type);
 
-	if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY &&
+	if (!IS_SNB && mask_type != 0 && mask_type != AGP_PHYS_MEMORY &&
 	    mask_type != INTEL_AGP_CACHED_MEMORY)
 		goto out_err;
 
@@ -1563,7 +1593,7 @@ static const struct agp_bridge_driver intel_gen6_driver = {
 	.fetch_size		= intel_i9xx_fetch_size,
 	.cleanup		= intel_i915_cleanup,
 	.mask_memory		= intel_gen6_mask_memory,
-	.masks			= intel_i810_masks,
+	.masks			= intel_gen6_masks,
 	.agp_enable		= intel_i810_agp_enable,
 	.cache_flush		= global_cache_flush,
 	.create_gatt_table	= intel_i965_create_gatt_table,
@@ -1576,7 +1606,7 @@ static const struct agp_bridge_driver intel_gen6_driver = {
 	.agp_alloc_pages        = agp_generic_alloc_pages,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_destroy_pages      = agp_generic_destroy_pages,
-	.agp_type_to_mask_type	= intel_i830_type_to_mask_type,
+	.agp_type_to_mask_type	= intel_gen6_type_to_mask_type,
 	.chipset_flush		= intel_i915_chipset_flush,
 #ifdef USE_PCI_DMA_API
 	.agp_map_page		= intel_agp_map_page,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 748c26340c35..16fca1d1799a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
+#include <linux/intel-gtt.h>
 
 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h
new file mode 100644
index 000000000000..1d19ab2afa39
--- /dev/null
+++ b/include/linux/intel-gtt.h
@@ -0,0 +1,20 @@
+/*
+ * Common Intel AGPGART and GTT definitions.
+ */
+#ifndef _INTEL_GTT_H
+#define _INTEL_GTT_H
+
+#include <linux/agp_backend.h>
+
+/* This is for Intel only GTT controls.
+ *
+ * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only
+ */
+
+#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
+#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
+
+/* flag for GFDT type */
+#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Sep 2010 14:46:37 +0200
Subject: semaphore: Add DEFINE_SEMAPHORE

The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been
done. Some of the users are real semaphores and we should name them as
such instead of confusing everyone with "MUTEX".

Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED]
and DECLARE_MUTEX.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
LKML-Reference: <20100907125054.795929962@linutronix.de>
---
 include/linux/semaphore.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 7415839ac890..5310d27abd2a 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -26,6 +26,9 @@ struct semaphore {
 	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
 }
 
+#define DEFINE_SEMAPHORE(name)	\
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
+
 #define DECLARE_MUTEX(name)	\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
 
-- 
cgit v1.2.3-59-g8ed1b


From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 7 Sep 2010 15:52:06 +0800
Subject: spi/dw_spi: clean the cs_control code

commit 052dc7c45i "spi/dw_spi: conditional transfer mode change"
introduced cs_control code, which has a bug by using bit offset
for spi mode to set transfer mode in control register. Also it
forces devices who don't need cs_control to re-configure the
control registers for each spi transfer. This patch will fix them

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/dw_spi.c       | 17 +++++------------
 include/linux/spi/dw_spi.h |  2 ++
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
index 11fbbf6fb07b..56247853c298 100644
--- a/drivers/spi/dw_spi.c
+++ b/drivers/spi/dw_spi.c
@@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws)
 	wait_till_not_busy(dws);
 }
 
-static void null_cs_control(u32 command)
-{
-}
-
 static int null_writer(struct dw_spi *dws)
 {
 	u8 n_bytes = dws->n_bytes;
@@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws)
 					struct spi_transfer,
 					transfer_list);
 
-	if (!last_transfer->cs_change)
+	if (!last_transfer->cs_change && dws->cs_control)
 		dws->cs_control(MRST_SPI_DEASSERT);
 
 	msg->state = NULL;
@@ -549,13 +545,13 @@ static void pump_transfers(unsigned long data)
 	 */
 	if (dws->cs_control) {
 		if (dws->rx && dws->tx)
-			chip->tmode = 0x00;
+			chip->tmode = SPI_TMOD_TR;
 		else if (dws->rx)
-			chip->tmode = 0x02;
+			chip->tmode = SPI_TMOD_RO;
 		else
-			chip->tmode = 0x01;
+			chip->tmode = SPI_TMOD_TO;
 
-		cr0 &= ~(0x3 << SPI_MODE_OFFSET);
+		cr0 &= ~SPI_TMOD_MASK;
 		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
 	}
 
@@ -704,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi)
 		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
 		if (!chip)
 			return -ENOMEM;
-
-		chip->cs_control = null_cs_control;
-		chip->enable_dma = 0;
 	}
 
 	/*
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
index cc813f95a2f2..c91302f3a257 100644
--- a/include/linux/spi/dw_spi.h
+++ b/include/linux/spi/dw_spi.h
@@ -14,7 +14,9 @@
 #define SPI_MODE_OFFSET			6
 #define SPI_SCPH_OFFSET			6
 #define SPI_SCOL_OFFSET			7
+
 #define SPI_TMOD_OFFSET			8
+#define SPI_TMOD_MASK			(0x3 << SPI_TMOD_OFFSET)
 #define	SPI_TMOD_TR			0x0		/* xmit & recv */
 #define SPI_TMOD_TO			0x1		/* xmit only */
 #define SPI_TMOD_RO			0x2		/* recv only */
-- 
cgit v1.2.3-59-g8ed1b


From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 5 Sep 2010 18:02:29 +0000
Subject: ipvs: fix active FTP

- Do not create expectation when forwarding the PORT
  command to avoid blocking the connection. The problem is that
  nf_conntrack_ftp.c:help() tries to create the same expectation later in
  POST_ROUTING and drops the packet with "dropping packet" message after
  failure in nf_ct_expect_related.

- Change ip_vs_update_conntrack to alter the conntrack
  for related connections from real server. If we do not alter the reply in
  this direction the next packet from client sent to vport 20 comes as NEW
  connection. We alter it but may be some collision happens for both
  conntracks and the second conntrack gets destroyed immediately. The
  connection stucks too.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h             |  3 +++
 net/netfilter/ipvs/ip_vs_core.c |  1 +
 net/netfilter/ipvs/ip_vs_ftp.c  |  6 ------
 net/netfilter/ipvs/ip_vs_xmit.c | 18 ++++++++++++------
 4 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a4747a0f7303..f976885f686f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
+extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
+				   int outin);
+
 #endif /* __KERNEL__ */
 
 #endif	/* _NET_IP_VS_H */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f8ddba48011..4c2f89df5cce 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_update_conntrack(skb, cp, 0);
 	ip_vs_conn_put(cp);
 
 	skb->ipvs_property = 1;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33b329bfc2d2..7e9af5b76d9e 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
-	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		ip_vs_control_add(n_cp, cp);
 	}
 
-	ct = (struct nf_conn *)skb->nfct;
-	if (ct && ct != &nf_conntrack_untracked)
-		ip_vs_expect_related(skb, ct, n_cp,
-				     IPPROTO_TCP, &n_cp->dport, 1);
-
 	/*
 	 *	Move tunnel to listen state
 	 */
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 21e1a5e9b9d3..49df6bea6a2d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 }
 #endif
 
-static void
-ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
 {
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	struct nf_conntrack_tuple new_tuple;
@@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
 	 * real-server we will see RIP->DIP.
 	 */
 	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	new_tuple.src.u3 = cp->daddr;
+	if (outin)
+		new_tuple.src.u3 = cp->daddr;
+	else
+		new_tuple.dst.u3 = cp->vaddr;
 	/*
 	 * This will also take care of UDP and other protocols.
 	 */
-	new_tuple.src.u.tcp.port = cp->dport;
+	if (outin)
+		new_tuple.src.u.tcp.port = cp->dport;
+	else
+		new_tuple.dst.u.tcp.port = cp->vport;
 	nf_conntrack_alter_reply(ct, &new_tuple);
 }
 
@@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
-- 
cgit v1.2.3-59-g8ed1b


From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Sep 2010 05:08:44 +0000
Subject: udp: add rehash on connect()

commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).

Problem is that following sequence :

fd = socket(...)
connect(fd, &remote, ...)

not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)

Sequence is :
 - autobind() : choose a random local port, insert socket in hash tables
              [while local address is INADDR_ANY]
 - connect() : set remote address and port, change local address to IP
              given by a route lookup.

When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.

One solution to this problem is to rehash datagram socket if needed.

We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.

This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.

Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  |  1 +
 include/net/udp.h   |  1 +
 net/ipv4/datagram.c |  5 ++++-
 net/ipv4/udp.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/datagram.c |  7 ++++++-
 net/ipv6/udp.c      | 10 ++++++++++
 6 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ac53bfbdfe16..adab9dc58183 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -752,6 +752,7 @@ struct proto {
 	/* Keeping track of sk's, looking them up, and port selection methods. */
 	void			(*hash)(struct sock *sk);
 	void			(*unhash)(struct sock *sk);
+	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
 
 	/* Keeping track of sockets in use */
diff --git a/include/net/udp.h b/include/net/udp.h
index 7abdf305da50..a184d3496b13 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk)
 }
 
 extern void udp_lib_unhash(struct sock *sk);
+extern void udp_lib_rehash(struct sock *sk, u16 new_hash);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f0550941df7b..721a8a37b45c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 	if (!inet->inet_saddr)
 		inet->inet_saddr = rt->rt_src;	/* Update source address */
-	if (!inet->inet_rcv_saddr)
+	if (!inet->inet_rcv_saddr) {
 		inet->inet_rcv_saddr = rt->rt_src;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
+	}
 	inet->inet_daddr = rt->rt_dst;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32e0bef60d0a..fb23c2e63b52 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk)
 }
 EXPORT_SYMBOL(udp_lib_unhash);
 
+/*
+ * inet_rcv_saddr was changed, we must rehash secondary hash
+ */
+void udp_lib_rehash(struct sock *sk, u16 newhash)
+{
+	if (sk_hashed(sk)) {
+		struct udp_table *udptable = sk->sk_prot->h.udp_table;
+		struct udp_hslot *hslot, *hslot2, *nhslot2;
+
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+		nhslot2 = udp_hashslot2(udptable, newhash);
+		udp_sk(sk)->udp_portaddr_hash = newhash;
+		if (hslot2 != nhslot2) {
+			hslot = udp_hashslot(udptable, sock_net(sk),
+					     udp_sk(sk)->udp_port_hash);
+			/* we must lock primary chain too */
+			spin_lock_bh(&hslot->lock);
+
+			spin_lock(&hslot2->lock);
+			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+			hslot2->count--;
+			spin_unlock(&hslot2->lock);
+
+			spin_lock(&nhslot2->lock);
+			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+						 &nhslot2->head);
+			nhslot2->count++;
+			spin_unlock(&nhslot2->lock);
+
+			spin_unlock_bh(&hslot->lock);
+		}
+	}
+}
+EXPORT_SYMBOL(udp_lib_rehash);
+
+static void udp_v4_rehash(struct sock *sk)
+{
+	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_num);
+	udp_lib_rehash(sk, new_hash);
+}
+
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
@@ -1843,6 +1886,7 @@ struct proto udp_prot = {
 	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v4_rehash,
 	.get_port	   = udp_v4_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7d929a22cbc2..ef371aa01ac5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -105,9 +105,12 @@ ipv4_connected:
 		if (ipv6_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&np->rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
 					       &np->rcv_saddr);
+			if (sk->sk_prot->rehash)
+				sk->sk_prot->rehash(sk);
+		}
 
 		goto out;
 	}
@@ -181,6 +184,8 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1dd1affdead2..5acb3560ff15 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
+static void udp_v6_rehash(struct sock *sk)
+{
+	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+					  &inet6_sk(sk)->rcv_saddr,
+					  inet_sk(sk)->inet_num);
+
+	udp_lib_rehash(sk, new_hash);
+}
+
 static inline int compute_score(struct sock *sk, struct net *net,
 				unsigned short hnum,
 				struct in6_addr *saddr, __be16 sport,
@@ -1447,6 +1456,7 @@ struct proto udpv6_prot = {
 	.backlog_rcv	   = udpv6_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
 	.get_port	   = udp_v6_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
-- 
cgit v1.2.3-59-g8ed1b


From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Fri, 20 Aug 2010 16:49:43 +0800
Subject: dquot: do full inode dirty in allocating space

Alex Shi found a regression when doing ffsb test. The test has several threads,
and each thread creates a small file, write to it and then delete it. ffsb
reports about 20% regression and Alex bisected it to 43d2932d88e4. The test
will call __mark_inode_dirty 3 times. without this commit, we only take
inode_lock one time, while with it, we take the lock 3 times with flags (
I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased
too much. Below proposed patch fixes it.

fs is allocating blocks, which usually means file writes and the inode
will be dirtied soon. We fully dirty the inode to reduce some inode_lock
contention in several calls of __mark_inode_dirty.

Jan Kara: Added comment.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quotaops.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d50ba858cfe0..d1a9193960f1 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 	int ret;
 
 	ret = dquot_alloc_space_nodirty(inode, nr);
-	if (!ret)
-		mark_inode_dirty_sync(inode);
+	if (!ret) {
+		/*
+		 * Mark inode fully dirty. Since we are allocating blocks, inode
+		 * would become fully dirty soon anyway and it reportedly
+		 * reduces inode_lock contention.
+		 */
+		mark_inode_dirty(inode);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Tue, 31 Aug 2010 15:52:27 +0200
Subject: mm: Move vma_stack_continue into mm.h

So it can be used by all that need to check for that.

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 3 ++-
 include/linux/mm.h | 6 ++++++
 mm/mlock.c         | 6 ------
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 439fc1f1c1c4..271afc48b9a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
 	if (vma->vm_flags & VM_GROWSDOWN)
-		start += PAGE_SIZE;
+		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
+			start += PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e6b1210772ce..74949fbef8c6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -864,6 +864,12 @@ int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
+/* Is the vma a continuation of the stack vma above it? */
+static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+{
+	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+}
+
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len);
diff --git a/mm/mlock.c b/mm/mlock.c
index cbae7c5b9568..b70919ce4f72 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page)
 	}
 }
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	return (vma->vm_flags & VM_GROWSDOWN) &&
-- 
cgit v1.2.3-59-g8ed1b


From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 8 Sep 2010 16:54:54 -0600
Subject: lglock: make lg_lock_global() actually lock globally

lg_lock_global() currently only acquires spinlocks for online CPUs, but
it's meant to lock all possible CPUs.  Lglock-protected resources may be
associated with removed CPUs - and, indeed, that could happen with the
per-superblock open files lists.

At Nick's suggestion, change for_each_online_cpu() to
for_each_possible_cpu() to protect accesses to those resources.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lglock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index b288cb713b90..f549056fb20b 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -150,7 +150,7 @@
 	int i;								\
 	preempt_disable();						\
 	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_possible_cpu(i) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_lock(lock);					\
@@ -161,7 +161,7 @@
  void name##_global_unlock(void) {					\
 	int i;								\
 	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_possible_cpu(i) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_unlock(lock);					\
-- 
cgit v1.2.3-59-g8ed1b


From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Thu, 9 Sep 2010 16:37:24 -0700
Subject: mmc: avoid getting CID on SDIO-only cards

The introduction of support for SD combo cards breaks the initialization
of all CSR SDIO chips.  The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR
chips to be reset (this is non-standard behavior).

When initializing an SDIO card check for a combo card by using the memory
present bit in the R4 response to IO_SEND_OP_COND (CMD5).  This avoids the
call to mmc_sd_get_cid() on an SDIO-only card.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Acked-by: Michal Mirolaw <mirq-linux@rere.qmqm.pl>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 5 ++---
 include/linux/mmc/sdio.h | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index bd2755e8d9a3..f332c52968b7 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 		goto err;
 	}
 
-	err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid);
-
-	if (!err) {
+	if (ocr & R4_MEMORY_PRESENT
+	    && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) {
 		card->type = MMC_TYPE_SD_COMBO;
 
 		if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO ||
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 329a8faa6e37..245cdacee544 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -38,6 +38,8 @@
  *      [8:0] Byte/block count
  */
 
+#define R4_MEMORY_PRESENT (1 << 27)
+
 /*
   SDIO status in R5
   Type
-- 
cgit v1.2.3-59-g8ed1b


From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 9 Sep 2010 16:37:26 -0700
Subject: kfifo: add parenthesis for macro parameter reference

Some macro parameter references inside typeof() operator are not enclosed
with parenthesis.  It should be safer to add them.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 58 +++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 4aa95f203f3e..62dbee554f60 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_reset(fifo) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	__tmp->kfifo.in = __tmp->kfifo.out = 0; \
 })
 
@@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_reset_out(fifo)	\
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	__tmp->kfifo.out = __tmp->kfifo.in; \
 })
 
@@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_len(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpl = (fifo); \
+	typeof((fifo) + 1) __tmpl = (fifo); \
 	__tmpl->kfifo.in - __tmpl->kfifo.out; \
 })
 
@@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define	kfifo_is_empty(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	__tmpq->kfifo.in == __tmpq->kfifo.out; \
 })
 
@@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define	kfifo_is_full(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	kfifo_len(__tmpq) > __tmpq->kfifo.mask; \
 })
 
@@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val)
 #define	kfifo_avail(fifo) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	const size_t __recsize = sizeof(*__tmpq->rectype); \
 	unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \
 	(__recsize) ? ((__avail <= __recsize) ? 0 : \
@@ -284,7 +284,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_skip(fifo) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	if (__recsize) \
@@ -302,7 +302,7 @@ __kfifo_must_check_helper( \
 #define kfifo_peek_len(fifo) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	(!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \
@@ -325,7 +325,7 @@ __kfifo_must_check_helper( \
 #define kfifo_alloc(fifo, size, gfp_mask) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	__is_kfifo_ptr(__tmp) ? \
 	__kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \
@@ -339,7 +339,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_free(fifo) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	if (__is_kfifo_ptr(__tmp)) \
 		__kfifo_free(__kfifo); \
@@ -358,7 +358,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_init(fifo, buffer, size) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	__is_kfifo_ptr(__tmp) ? \
 	__kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \
@@ -379,8 +379,8 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_put(fifo, val) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -421,8 +421,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_get(fifo, val) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -462,8 +462,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_peek(fifo, val) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -501,8 +501,8 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_in(fifo, buf, n) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -554,8 +554,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_out(fifo, buf, n) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -611,7 +611,7 @@ __kfifo_must_check_helper( \
 #define	kfifo_from_user(fifo, from, len, copied) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const void __user *__from = (from); \
 	unsigned int __len = (len); \
 	unsigned int *__copied = (copied); \
@@ -639,7 +639,7 @@ __kfifo_must_check_helper( \
 #define	kfifo_to_user(fifo, to, len, copied) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	void __user *__to = (to); \
 	unsigned int __len = (len); \
 	unsigned int *__copied = (copied); \
@@ -666,7 +666,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_dma_in_prepare(fifo, sgl, nents, len) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct scatterlist *__sgl = (sgl); \
 	int __nents = (nents); \
 	unsigned int __len = (len); \
@@ -690,7 +690,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_dma_in_finish(fifo, len) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	unsigned int __len = (len); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -717,7 +717,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_dma_out_prepare(fifo, sgl, nents, len) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo);  \
 	struct scatterlist *__sgl = (sgl); \
 	int __nents = (nents); \
 	unsigned int __len = (len); \
@@ -741,7 +741,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_dma_out_finish(fifo, len) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	unsigned int __len = (len); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -766,8 +766,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_out_peek(fifo, buf, n) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-- 
cgit v1.2.3-59-g8ed1b


From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 9 Sep 2010 16:37:37 -0700
Subject: cgroups: fix API thinko

Add cgroup_attach_task_all()

The existing cgroup_attach_task_current_cg() API is called by a thread to
attach another thread to all of its cgroups; this is unsuitable for cases
where a privileged task wants to attach itself to the cgroups of a less
privileged one, since the call must be made from the context of the target
task.

This patch adds a more generic cgroup_attach_task_all() API that allows
both the source task and to-be-moved task to be specified.
cgroup_attach_task_current_cg() becomes a specialization of the more
generic new function.

[menage@google.com: rewrote changelog]
[akpm@linux-foundation.org: address reviewer comments]
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Ben Blum <bblum@google.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 12 +++++++++++-
 kernel/cgroup.c        | 13 +++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6e..0c991023ee47 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
-int cgroup_attach_task_current_cg(struct task_struct *);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+
+static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+	return cgroup_attach_task_all(current, tsk);
+}
 
 /*
  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 }
 
 /* No cgroups - nothing to do */
+static inline int cgroup_attach_task_all(struct task_struct *from,
+					 struct task_struct *t)
+{
+	return 0;
+}
 static inline int cgroup_attach_task_current_cg(struct task_struct *t)
 {
 	return 0;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c5b0f9..c9483d8f6140 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,19 +1791,20 @@ out:
 }
 
 /**
- * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
  * @tsk: the task to be attached
  */
-int cgroup_attach_task_current_cg(struct task_struct *tsk)
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 {
 	struct cgroupfs_root *root;
-	struct cgroup *cur_cg;
 	int retval = 0;
 
 	cgroup_lock();
 	for_each_active_root(root) {
-		cur_cg = task_cgroup_from_root(current, root);
-		retval = cgroup_attach_task(cur_cg, tsk);
+		struct cgroup *from_cg = task_cgroup_from_root(from, root);
+
+		retval = cgroup_attach_task(from_cg, tsk);
 		if (retval)
 			break;
 	}
@@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
 /*
  * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
-- 
cgit v1.2.3-59-g8ed1b


From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Sep 2010 16:37:52 -0700
Subject: mm: fix swapin race condition

The pte_same check is reliable only if the swap entry remains pinned (by
the page lock on swapcache).  We've also to ensure the swapcache isn't
removed before we take the lock as try_to_free_swap won't care about the
page pin.

One of the possible impacts of this patch is that a KSM-shared page can
point to the anon_vma of another process, which could exit before the page
is freed.

This can leave a page with a pointer to a recycled anon_vma object, or
worse, a pointer to something that is no longer an anon_vma.

[riel@redhat.com: changelog help]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 20 +++++++++-----------
 mm/ksm.c            |  3 ---
 mm/memory.c         | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 43 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 74d691ee9121..3319a6967626 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,6 +16,9 @@
 struct stable_node;
 struct mem_cgroup;
 
+struct page *ksm_does_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page,
  * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
  * but what if the vma was unmerged while the page was swapped out?
  */
-struct page *ksm_does_need_to_copy(struct page *page,
-			struct vm_area_struct *vma, unsigned long address);
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
 	struct anon_vma *anon_vma = page_anon_vma(page);
 
-	if (!anon_vma ||
-	    (anon_vma->root == vma->anon_vma->root &&
-	     page->index == linear_page_index(vma, address)))
-		return page;
-
-	return ksm_does_need_to_copy(page, vma, address);
+	return anon_vma &&
+		(anon_vma->root != vma->anon_vma->root ||
+		 page->index != linear_page_index(vma, address));
 }
 
 int page_referenced_ksm(struct page *page,
@@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 	return 0;
 }
 
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
-	return page;
+	return 0;
 }
 
 static inline int page_referenced_ksm(struct page *page,
diff --git a/mm/ksm.c b/mm/ksm.c
index e2ae00458320..b1873cf03ed9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 {
 	struct page *new_page;
 
-	unlock_page(page);	/* any racers will COW it, not modify it */
-
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
 		copy_user_highpage(new_page, page, address, vma);
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 			add_page_to_unevictable_list(new_page);
 	}
 
-	page_cache_release(page);
 	return new_page;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 6b2ab1051851..71b161b73bb5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned int flags, pte_t orig_pte)
 {
 	spinlock_t *ptl;
-	struct page *page;
+	struct page *page, *swapcache = NULL;
 	swp_entry_t entry;
 	pte_t pte;
 	struct mem_cgroup *ptr = NULL;
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	lock_page(page);
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 
-	page = ksm_might_need_to_copy(page, vma, address);
-	if (!page) {
-		ret = VM_FAULT_OOM;
-		goto out;
+	/*
+	 * Make sure try_to_free_swap didn't release the swapcache
+	 * from under us. The page pin isn't enough to prevent that.
+	 */
+	if (unlikely(!PageSwapCache(page)))
+		goto out_page;
+
+	if (ksm_might_need_to_copy(page, vma, address)) {
+		swapcache = page;
+		page = ksm_does_need_to_copy(page, vma, address);
+
+		if (unlikely(!page)) {
+			ret = VM_FAULT_OOM;
+			page = swapcache;
+			swapcache = NULL;
+			goto out_page;
+		}
 	}
 
 	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
 		try_to_free_swap(page);
 	unlock_page(page);
+	if (swapcache) {
+		/*
+		 * Hold the lock to avoid the swap entry to be reused
+		 * until we take the PT lock for the pte_same() check
+		 * (to avoid false positives from pte_same). For
+		 * further safety release the lock after the swap_free
+		 * so that the swap count won't change under a
+		 * parallel locked swapcache.
+		 */
+		unlock_page(swapcache);
+		page_cache_release(swapcache);
+	}
 
 	if (flags & FAULT_FLAG_WRITE) {
 		ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,6 +2781,10 @@ out_page:
 	unlock_page(page);
 out_release:
 	page_cache_release(page);
+	if (swapcache) {
+		unlock_page(swapcache);
+		page_cache_release(swapcache);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001
From: Gregory Bean <gbean@codeaurora.org>
Date: Thu, 9 Sep 2010 16:38:02 -0700
Subject: gpio: sx150x: correct and refine reset-on-probe behavior

Replace the arbitrary software-reset call from the device-probe
method, because:

- It is defective.  To work correctly, it should be two byte writes,
  not a single word write.  As it stands, it does nothing.

- Some devices with sx150x expanders installed have their NRESET pins
  ganged on the same line, so resetting one causes the others to reset -
  not a nice thing to do arbitrarily!

- The probe, usually taking place at boot, implies a recent hard-reset,
  so a software reset at this point is just a waste of energy anyway.

Therefore, make it optional, defaulting to off, as this will match the
common case of probing at powerup and also matches the current broken
no-op behavior.

Signed-off-by: Gregory Bean <gbean@codeaurora.org>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/sx150x.c      | 26 +++++++++++++++++++++-----
 include/linux/i2c/sx150x.h |  4 ++++
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c
index b42f42ca70c3..823559ab0e24 100644
--- a/drivers/gpio/sx150x.c
+++ b/drivers/gpio/sx150x.c
@@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg)
 	return err;
 }
 
-static int sx150x_init_hw(struct sx150x_chip *chip,
-			struct sx150x_platform_data *pdata)
+static int sx150x_reset(struct sx150x_chip *chip)
 {
-	int err = 0;
+	int err;
 
-	err = i2c_smbus_write_word_data(chip->client,
+	err = i2c_smbus_write_byte_data(chip->client,
 					chip->dev_cfg->reg_reset,
-					0x3412);
+					0x12);
 	if (err < 0)
 		return err;
 
+	err = i2c_smbus_write_byte_data(chip->client,
+					chip->dev_cfg->reg_reset,
+					0x34);
+	return err;
+}
+
+static int sx150x_init_hw(struct sx150x_chip *chip,
+			struct sx150x_platform_data *pdata)
+{
+	int err = 0;
+
+	if (pdata->reset_during_probe) {
+		err = sx150x_reset(chip);
+		if (err < 0)
+			return err;
+	}
+
 	err = sx150x_i2c_write(chip->client,
 			chip->dev_cfg->reg_misc,
 			0x01);
diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h
index ee3049cb9ba5..52baa79d69a7 100644
--- a/include/linux/i2c/sx150x.h
+++ b/include/linux/i2c/sx150x.h
@@ -63,6 +63,9 @@
  *            IRQ lines will appear.  Similarly to gpio_base, the expander
  *            will create a block of irqs beginning at this number.
  *            This value is ignored if irq_summary is < 0.
+ * @reset_during_probe: If set to true, the driver will trigger a full
+ *                      reset of the chip at the beginning of the probe
+ *                      in order to place it in a known state.
  */
 struct sx150x_platform_data {
 	unsigned gpio_base;
@@ -73,6 +76,7 @@ struct sx150x_platform_data {
 	u16      io_polarity;
 	int      irq_summary;
 	unsigned irq_base;
+	bool     reset_during_probe;
 };
 
 #endif /* __LINUX_I2C_SX150X_H */
-- 
cgit v1.2.3-59-g8ed1b


From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 9 Sep 2010 16:38:03 -0700
Subject: gpio: doc updates

There's been some recent confusion about error checking GPIO numbers.
briefly, it should be handled mostly during setup, when gpio_request() is
called, and NEVER by expectig gpio_is_valid to report more than
never-usable GPIO numbers.

[akpm@linux-foundation.org: terminate unterminated comment]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Eric Miao" <eric.y.miao@gmail.com>
Cc: "Ryan Mallon" <ryan@bluewatersys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     | 22 ++++++++++++++--------
 include/asm-generic/gpio.h | 14 +++++++++++++-
 2 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index d96a6dba5748..9633da01ff46 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
 
 If you want to initialize a structure with an invalid GPIO number, use
 some negative number (perhaps "-EINVAL"); that will never be valid.  To
-test if a number could reference a GPIO, you may use this predicate:
+test if such number from such a structure could reference a GPIO, you
+may use this predicate:
 
 	int gpio_is_valid(int number);
 
 A number that's not valid will be rejected by calls which may request
 or free GPIOs (see below).  Other numbers may also be rejected; for
-example, a number might be valid but unused on a given board.
-
-Whether a platform supports multiple GPIO controllers is currently a
-platform-specific implementation issue.
+example, a number might be valid but temporarily unused on a given board.
 
+Whether a platform supports multiple GPIO controllers is a platform-specific
+implementation issue, as are whether that support can leave "holes" in the space
+of GPIO numbers, and whether new controllers can be added at runtime.  Such issues
+can affect things including whether adjacent GPIO numbers are both valid.
 
 Using GPIOs
 -----------
@@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either
 ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
 and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
 three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
-They may also want to provide a custom value for ARCH_NR_GPIOS.
 
-ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+It may also provide a custom value for ARCH_NR_GPIOS, so that it better
+reflects the number of GPIOs in actual use on that platform, without
+wasting static table space.  (It should count both built-in/SoC GPIOs and
+also ones on GPIO expanders.
+
+ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled
 into the kernel on that architecture.
 
-ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user
 can enable it and build it into the kernel optionally.
 
 If neither of these options are selected, the platform does not support
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index c7376bf80b06..8ca18e26d7e3 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -16,15 +16,27 @@
  * While the GPIO programming interface defines valid GPIO numbers
  * to be in the range 0..MAX_INT, this library restricts them to the
  * smaller range 0..ARCH_NR_GPIOS-1.
+ *
+ * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of
+ * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is
+ * actually an estimate of a board-specific value.
  */
 
 #ifndef ARCH_NR_GPIOS
 #define ARCH_NR_GPIOS		256
 #endif
 
+/*
+ * "valid" GPIO numbers are nonnegative and may be passed to
+ * setup routines like gpio_request().  only some valid numbers
+ * can successfully be requested and used.
+ *
+ * Invalid GPIO numbers are useful for indicating no-such-GPIO in
+ * platform data and other tables.
+ */
+
 static inline int gpio_is_valid(int number)
 {
-	/* only some non-negative numbers are valid */
 	return ((unsigned)number) < ARCH_NR_GPIOS;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 9 Sep 2010 16:38:07 -0700
Subject: swap: revert special hibernation allocation

Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf
"hibernation: freeze swap at hibernation".  It complicated matters by
adding a second swap allocation path, just for hibernation; without in any
way fixing the issue that it was intended to address - page reclaim after
fixing the hibernation image might free swap from a page already imaged as
swapcache, letting its swap be reallocated to store a different page of
the image: resulting in data corruption if the imaged page were freed as
clean then swapped back in.  Pages freed to si->swap_map were still in
danger of being reallocated by the alternative allocation path.

I guess it inadvertently fixed slow SSD swap allocation for hibernation,
as reported by Nigel Cunningham: by missing out the discards that occur on
the usual swap allocation path; but that was unintentional, and needs a
separate fix.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ondrej Zary <linux@rainbow-software.org>
Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h     |  8 +----
 kernel/power/hibernate.c |  1 -
 kernel/power/snapshot.c  |  1 -
 kernel/power/swap.c      |  6 ++--
 mm/swapfile.c            | 94 ++++++++++++------------------------------------
 5 files changed, 26 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fee51a11b73..bf4eb62506db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -315,6 +315,7 @@ extern long nr_swap_pages;
 extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page_of_type(int);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
@@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
-#ifdef CONFIG_HIBERNATION
-void hibernation_freeze_swap(void);
-void hibernation_thaw_swap(void);
-swp_entry_t get_swap_for_hibernation(int type);
-void swap_free_for_hibernation(swp_entry_t val);
-#endif
-
 /* linux/mm/thrash.c */
 extern struct mm_struct *swap_token_mm;
 extern void grab_swap_token(struct mm_struct *);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c77963938bca..8dc31e02ae12 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
-	hibernation_freeze_swap();
 	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5e7edfb05e66..f6cd6faf84fd 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1086,7 +1086,6 @@ void swsusp_free(void)
 	buffer = NULL;
 	alloc_normal = 0;
 	alloc_highmem = 0;
-	hibernation_thaw_swap();
 }
 
 /* Helper functions used for the shrinking of memory. */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 5d0059eed3e4..e6a5bdf61a37 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap)
 {
 	unsigned long offset;
 
-	offset = swp_offset(get_swap_for_hibernation(swap));
+	offset = swp_offset(get_swap_page_of_type(swap));
 	if (offset) {
 		if (swsusp_extents_insert(offset))
-			swap_free_for_hibernation(swp_entry(swap, offset));
+			swap_free(swp_entry(swap, offset));
 		else
 			return swapdev_block(swap, offset);
 	}
@@ -163,7 +163,7 @@ void free_all_swap_pages(int swap)
 		ext = container_of(node, struct swsusp_extent, node);
 		rb_erase(node, &swsusp_extents);
 		for (offset = ext->start; offset <= ext->end; offset++)
-			swap_free_for_hibernation(swp_entry(swap, offset));
+			swap_free(swp_entry(swap, offset));
 
 		kfree(ext);
 	}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1f3f9c59a73a..f08d165871b3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -47,8 +47,6 @@ long nr_swap_pages;
 long total_swap_pages;
 static int least_priority;
 
-static bool swap_for_hibernation;
-
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
@@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void)
 	spin_lock(&swap_lock);
 	if (nr_swap_pages <= 0)
 		goto noswap;
-	if (swap_for_hibernation)
-		goto noswap;
 	nr_swap_pages--;
 
 	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
@@ -487,6 +483,28 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+/* The only caller of this function is now susupend routine */
+swp_entry_t get_swap_page_of_type(int type)
+{
+	struct swap_info_struct *si;
+	pgoff_t offset;
+
+	spin_lock(&swap_lock);
+	si = swap_info[type];
+	if (si && (si->flags & SWP_WRITEOK)) {
+		nr_swap_pages--;
+		/* This is called for allocating swap entry, not cache */
+		offset = scan_swap_map(si, 1);
+		if (offset) {
+			spin_unlock(&swap_lock);
+			return swp_entry(type, offset);
+		}
+		nr_swap_pages++;
+	}
+	spin_unlock(&swap_lock);
+	return (swp_entry_t) {0};
+}
+
 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
 #endif
 
 #ifdef CONFIG_HIBERNATION
-
-static pgoff_t hibernation_offset[MAX_SWAPFILES];
-/*
- * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise,
- * saved swap_map[] image to the disk will be an incomplete because it's
- * changing without synchronization with hibernation snap shot.
- * At resume, we just make swap_for_hibernation=false. We can forget
- * used maps easily.
- */
-void hibernation_freeze_swap(void)
-{
-	int i;
-
-	spin_lock(&swap_lock);
-
-	printk(KERN_INFO "PM: Freeze Swap\n");
-	swap_for_hibernation = true;
-	for (i = 0; i < MAX_SWAPFILES; i++)
-		hibernation_offset[i] = 1;
-	spin_unlock(&swap_lock);
-}
-
-void hibernation_thaw_swap(void)
-{
-	spin_lock(&swap_lock);
-	if (swap_for_hibernation) {
-		printk(KERN_INFO "PM: Thaw Swap\n");
-		swap_for_hibernation = false;
-	}
-	spin_unlock(&swap_lock);
-}
-
-/*
- * Because updateing swap_map[] can make not-saved-status-change,
- * we use our own easy allocator.
- * Please see kernel/power/swap.c, Used swaps are recorded into
- * RB-tree.
- */
-swp_entry_t get_swap_for_hibernation(int type)
-{
-	pgoff_t off;
-	swp_entry_t val = {0};
-	struct swap_info_struct *si;
-
-	spin_lock(&swap_lock);
-
-	si = swap_info[type];
-	if (!si || !(si->flags & SWP_WRITEOK))
-		goto done;
-
-	for (off = hibernation_offset[type]; off < si->max; ++off) {
-		if (!si->swap_map[off])
-			break;
-	}
-	if (off < si->max) {
-		val = swp_entry(type, off);
-		hibernation_offset[type] = off + 1;
-	}
-done:
-	spin_unlock(&swap_lock);
-	return val;
-}
-
-void swap_free_for_hibernation(swp_entry_t ent)
-{
-	/* Nothing to do */
-}
-
 /*
  * Find the swap type that corresponds to given device (if any).
  *
-- 
cgit v1.2.3-59-g8ed1b


From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 9 Sep 2010 16:38:11 -0700
Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD

Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs
show a shift since I last tested in December: in part because of firmware
updates, in part because of the necessary move from barriers to awaiting
completion at the block layer.  While discard at swapon still shows as
slightly beneficial on both, discarding 1MB swap cluster when allocating
is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV).

Surrender: discard as presently implemented is more hindrance than help
for swap; but might prove useful on other devices, or with improvements.
So continue to do the discard at swapon, but make discard while swapping
conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using
only the lower 16 bits of int flags).

We can add a --discard or -d to swapon(8), and a "discard" to swap in
/etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 mm/swapfile.c        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bf4eb62506db..7cdd63366f88 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -19,6 +19,7 @@ struct bio;
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
+#define SWAP_FLAG_DISCARD	0x10000 /* discard swap cluster after use */
 
 static inline int current_is_kswapd(void)
 {
@@ -142,7 +143,7 @@ struct swap_extent {
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
-	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
+	SWP_DISCARDABLE = (1 << 2),	/* swapon+blkdev support discard */
 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1894dead0b58..7c703ff2f36f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 			p->flags |= SWP_SOLIDSTATE;
 			p->cluster_next = 1 + (random32() % p->highest_bit);
 		}
-		if (discard_swap(p) == 0)
+		if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD))
 			p->flags |= SWP_DISCARDABLE;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 9 Sep 2010 16:38:17 -0700
Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when
 memory is low and kswapd is awake

Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is
cheaper than scanning a number of lists.  To avoid synchronization
overhead, counter deltas are maintained on a per-cpu basis and drained
both periodically and when the delta is above a threshold.  On large CPU
systems, the difference between the estimated and real value of
NR_FREE_PAGES can be very high.  If NR_FREE_PAGES is much higher than
number of real free page in buddy, the VM can allocate pages below min
watermark, at worst reducing the real number of pages to zero.  Even if
the OOM killer kills some victim for freeing memory, it may not free
memory if the exit path requires a new page resulting in livelock.

This patch introduces a zone_page_state_snapshot() function (courtesy of
Christoph) that takes a slightly more accurate view of an arbitrary vmstat
counter.  It is used to read NR_FREE_PAGES while kswapd is awake to avoid
the watermark being accidentally broken.  The estimate is not perfect and
may result in cache line bounces but is expected to be lighter than the
IPI calls necessary to continually drain the per-cpu counters while kswapd
is awake.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 13 +++++++++++++
 include/linux/vmstat.h | 22 ++++++++++++++++++++++
 mm/mmzone.c            | 21 +++++++++++++++++++++
 mm/page_alloc.c        |  4 ++--
 mm/vmstat.c            | 15 ++++++++++++++-
 5 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6e6e62648a4d..3984c4eb41fd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -283,6 +283,13 @@ struct zone {
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
+	/*
+	 * When free pages are below this point, additional steps are taken
+	 * when reading the number of free pages to avoid per-cpu counter
+	 * drift allowing watermarks to be breached
+	 */
+	unsigned long percpu_drift_mark;
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
@@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
 }
 
+#ifdef CONFIG_SMP
+unsigned long zone_nr_free_pages(struct zone *zone);
+#else
+#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
+#endif /* CONFIG_SMP */
+
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 7f43ccdc1d38..eaaea37b3b75 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
+/*
+ * More accurate version that also considers the currently pending
+ * deltas. For that we need to loop over all cpus to find the current
+ * deltas. There is no synchronization so the result cannot be
+ * exactly accurate either.
+ */
+static inline unsigned long zone_page_state_snapshot(struct zone *zone,
+					enum zone_stat_item item)
+{
+	long x = atomic_long_read(&zone->vm_stat[item]);
+
+#ifdef CONFIG_SMP
+	int cpu;
+	for_each_online_cpu(cpu)
+		x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item];
+
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 extern unsigned long global_reclaimable_pages(void);
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
diff --git a/mm/mmzone.c b/mm/mmzone.c
index f5b7d1760213..e35bfb82c855 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn,
 	return 1;
 }
 #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
+#ifdef CONFIG_SMP
+/* Called when a more accurate view of NR_FREE_PAGES is needed */
+unsigned long zone_nr_free_pages(struct zone *zone)
+{
+	unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
+
+	/*
+	 * While kswapd is awake, it is considered the zone is under some
+	 * memory pressure. Under pressure, there is a risk that
+	 * per-cpu-counter-drift will allow the min watermark to be breached
+	 * potentially causing a live-lock. While kswapd is awake and
+	 * free pages are low, get a better estimate for free pages
+	 */
+	if (nr_free_pages < zone->percpu_drift_mark &&
+			!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
+		return zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+	return nr_free_pages;
+}
+#endif /* CONFIG_SMP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 452e2ba06c7c..b2d21e06d45d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1462,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
-	long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
+	long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
 	int o;
 
 	if (alloc_flags & ALLOC_HIGH)
@@ -2424,7 +2424,7 @@ void show_free_areas(void)
 			" all_unreclaimable? %s"
 			"\n",
 			zone->name,
-			K(zone_page_state(zone, NR_FREE_PAGES)),
+			K(zone_nr_free_pages(zone)),
 			K(min_wmark_pages(zone)),
 			K(low_wmark_pages(zone)),
 			K(high_wmark_pages(zone)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8d6b59e609a..355a9e669aaa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void)
 	int threshold;
 
 	for_each_populated_zone(zone) {
+		unsigned long max_drift, tolerate_drift;
+
 		threshold = calculate_threshold(zone);
 
 		for_each_online_cpu(cpu)
 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
 							= threshold;
+
+		/*
+		 * Only set percpu_drift_mark if there is a danger that
+		 * NR_FREE_PAGES reports the low watermark is ok when in fact
+		 * the min watermark could be breached by an allocation
+		 */
+		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
+		max_drift = num_online_cpus() * threshold;
+		if (max_drift > tolerate_drift)
+			zone->percpu_drift_mark = high_wmark_pages(zone) +
+					max_drift;
 	}
 }
 
@@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        scanned  %lu"
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
-		   zone_page_state(zone, NR_FREE_PAGES),
+		   zone_nr_free_pages(zone),
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
-- 
cgit v1.2.3-59-g8ed1b


From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 7 Sep 2010 14:05:31 +0200
Subject: libata: skip EH autopsy and recovery during suspend

For some mysterious reason, certain hardware reacts badly to usual EH
actions while the system is going for suspend.  As the devices won't
be needed until the system is resumed, ask EH to skip usual autopsy
and recovery and proceed directly to suspend.

Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Stephan Diestelhorst <stephan.diestelhorst@amd.com>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 14 +++++++++++++-
 drivers/ata/libata-eh.c   |  4 ++++
 include/linux/libata.h    |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c035b3d041ee..932eaee50245 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5418,6 +5418,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
  */
 int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
 {
+	unsigned int ehi_flags = ATA_EHI_QUIET;
 	int rc;
 
 	/*
@@ -5426,7 +5427,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
 	 */
 	ata_lpm_enable(host);
 
-	rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1);
+	/*
+	 * On some hardware, device fails to respond after spun down
+	 * for suspend.  As the device won't be used before being
+	 * resumed, we don't need to touch the device.  Ask EH to skip
+	 * the usual stuff and proceed directly to suspend.
+	 *
+	 * http://thread.gmane.org/gmane.linux.ide/46764
+	 */
+	if (mesg.event == PM_EVENT_SUSPEND)
+		ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY;
+
+	rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1);
 	if (rc == 0)
 		host->dev->power.power_state = mesg;
 	return rc;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c9ae299b8342..e48302eae55f 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3235,6 +3235,10 @@ static int ata_eh_skip_recovery(struct ata_link *link)
 	if (link->flags & ATA_LFLAG_DISABLED)
 		return 1;
 
+	/* skip if explicitly requested */
+	if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
+		return 1;
+
 	/* thaw frozen port and recover failed devices */
 	if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
 		return 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f010f18a0f86..7de282d8bedf 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -335,6 +335,7 @@ enum {
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
 	ATA_EHI_NO_AUTOPSY	= (1 << 2),  /* no autopsy */
 	ATA_EHI_QUIET		= (1 << 3),  /* be quiet */
+	ATA_EHI_NO_RECOVERY	= (1 << 4),  /* no recovery */
 
 	ATA_EHI_DID_SOFTRESET	= (1 << 16), /* already soft-reset this port */
 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
-- 
cgit v1.2.3-59-g8ed1b


From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@google.com>
Date: Tue, 31 Aug 2010 16:20:36 -0700
Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling.

Keep track of the link on the which the current request is in progress.
It allows support of links behind port multiplier.

Not all libata-sff is PMP compliant. Code for native BMDMA controller
does not take in accound PMP.

Tested on Marvell 7042 and Sil7526.

Signed-off-by: Gwendal Grignou <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c | 38 ++++++++++++++++++++++++++++----------
 drivers/ata/sata_mv.c    |  2 +-
 include/linux/libata.h   |  3 ++-
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index dee3c2c52562..e30c537cce32 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1045,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 		     u8 status, int in_wq)
 {
-	struct ata_eh_info *ehi = &ap->link.eh_info;
+	struct ata_link *link = qc->dev->link;
+	struct ata_eh_info *ehi = &link->eh_info;
 	unsigned long flags = 0;
 	int poll_next;
 
@@ -1301,8 +1302,14 @@ fsm_start:
 }
 EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
 
-void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay)
+void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
 {
+	struct ata_port *ap = link->ap;
+
+	WARN_ON((ap->sff_pio_task_link != NULL) &&
+		(ap->sff_pio_task_link != link));
+	ap->sff_pio_task_link = link;
+
 	/* may fail if ata_sff_flush_pio_task() in progress */
 	queue_delayed_work(ata_sff_wq, &ap->sff_pio_task,
 			   msecs_to_jiffies(delay));
@@ -1324,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work)
 {
 	struct ata_port *ap =
 		container_of(work, struct ata_port, sff_pio_task.work);
+	struct ata_link *link = ap->sff_pio_task_link;
 	struct ata_queued_cmd *qc;
 	u8 status;
 	int poll_next;
 
+	BUG_ON(ap->sff_pio_task_link == NULL); 
 	/* qc can be NULL if timeout occurred */
-	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	if (!qc)
+	qc = ata_qc_from_tag(ap, link->active_tag);
+	if (!qc) {
+		ap->sff_pio_task_link = NULL;
 		return;
+	}
 
 fsm_start:
 	WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE);
@@ -1348,11 +1359,16 @@ fsm_start:
 		msleep(2);
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
-			ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE);
+			ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
 			return;
 		}
 	}
 
+	/*
+	 * hsm_move() may trigger another command to be processed.
+	 * clean the link beforehand.
+	 */
+	ap->sff_pio_task_link = NULL;
 	/* move the HSM */
 	poll_next = ata_sff_hsm_move(ap, qc, status, 1);
 
@@ -1379,6 +1395,7 @@ fsm_start:
 unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_link *link = qc->dev->link;
 
 	/* Use polling pio if the LLD doesn't handle
 	 * interrupt driven pio and atapi CDB interrupt.
@@ -1399,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		ap->hsm_task_state = HSM_ST_LAST;
 
 		if (qc->tf.flags & ATA_TFLAG_POLLING)
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 
 		break;
 
@@ -1412,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		if (qc->tf.flags & ATA_TFLAG_WRITE) {
 			/* PIO data out protocol */
 			ap->hsm_task_state = HSM_ST_FIRST;
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 
 			/* always send first data block using the
 			 * ata_sff_pio_task() codepath.
@@ -1422,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 			ap->hsm_task_state = HSM_ST;
 
 			if (qc->tf.flags & ATA_TFLAG_POLLING)
-				ata_sff_queue_pio_task(ap, 0);
+				ata_sff_queue_pio_task(link, 0);
 
 			/* if polling, ata_sff_pio_task() handles the
 			 * rest.  otherwise, interrupt handler takes
@@ -1444,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		/* send cdb by polling if no cdb interrupt */
 		if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) ||
 		    (qc->tf.flags & ATA_TFLAG_POLLING))
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 		break;
 
 	default:
@@ -2737,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
 unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_link *link = qc->dev->link;
 
 	/* defer PIO handling to sff_qc_issue */
 	if (!ata_is_dma(qc->tf.protocol))
@@ -2765,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
 
 		/* send cdb by polling if no cdb interrupt */
 		if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 		break;
 
 	default:
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 81982594a014..a9fd9709c262 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc)
 	}
 
 	if (qc->tf.flags & ATA_TFLAG_POLLING)
-		ata_sff_queue_pio_task(ap, 0);
+		ata_sff_queue_pio_task(link, 0);
 	return 0;
 }
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7de282d8bedf..45fb2967b66d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -724,6 +724,7 @@ struct ata_port {
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
+	struct ata_link*	sff_pio_task_link; /* link currently used */
 	struct delayed_work	sff_pio_task;
 #ifdef CONFIG_ATA_BMDMA
 	struct ata_bmdma_prd	*bmdma_prd;	/* BMDMA SG list */
@@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 			    u8 status, int in_wq);
-extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay);
+extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay);
 extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc);
 extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc);
 extern unsigned int ata_sff_port_intr(struct ata_port *ap,
-- 
cgit v1.2.3-59-g8ed1b


From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: SUNRPC: Fix a race in rpc_info_open

There is a race between rpc_info_open and rpc_release_client()
in that nothing stops a process from opening the file after
the clnt->cl_kref goes to zero.

Fix this by using atomic_inc_unless_zero()...

Reported-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 include/linux/sunrpc/clnt.h |  2 +-
 net/sunrpc/clnt.c           | 26 ++++++++++++--------------
 net/sunrpc/rpc_pipe.c       | 14 ++++++++------
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 569dc722a600..85f38a63f098 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -30,7 +30,7 @@ struct rpc_inode;
  * The high-level client handle
  */
 struct rpc_clnt {
-	struct kref		cl_kref;	/* Number of references */
+	atomic_t		cl_count;	/* Number of references */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 657aac630fc9..3a8f53e7ba07 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 			goto out_no_principal;
 	}
 
-	kref_init(&clnt->cl_kref);
+	atomic_set(&clnt->cl_count, 1);
 
 	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
 	if (err < 0)
@@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt)
 		if (new->cl_principal == NULL)
 			goto out_no_principal;
 	}
-	kref_init(&new->cl_kref);
+	atomic_set(&new->cl_count, 1);
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
 		goto out_no_path;
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 	xprt_get(clnt->cl_xprt);
-	kref_get(&clnt->cl_kref);
+	atomic_inc(&clnt->cl_count);
 	rpc_register_client(new);
 	rpciod_up();
 	return new;
@@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client);
  * Free an RPC client
  */
 static void
-rpc_free_client(struct kref *kref)
+rpc_free_client(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	dprintk("RPC:       destroying %s client for %s\n",
 			clnt->cl_protname, clnt->cl_server);
 	if (!IS_ERR(clnt->cl_path.dentry)) {
@@ -495,12 +493,10 @@ out_free:
  * Free an RPC client
  */
 static void
-rpc_free_auth(struct kref *kref)
+rpc_free_auth(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	if (clnt->cl_auth == NULL) {
-		rpc_free_client(kref);
+		rpc_free_client(clnt);
 		return;
 	}
 
@@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref)
 	 *       release remaining GSS contexts. This mechanism ensures
 	 *       that it can do so safely.
 	 */
-	kref_init(kref);
+	atomic_inc(&clnt->cl_count);
 	rpcauth_release(clnt->cl_auth);
 	clnt->cl_auth = NULL;
-	kref_put(kref, rpc_free_client);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_client(clnt);
 }
 
 /*
@@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt)
 
 	if (list_empty(&clnt->cl_tasks))
 		wake_up(&destroy_wait);
-	kref_put(&clnt->cl_kref, rpc_free_auth);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_auth(clnt);
 }
 
 /**
@@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 	if (clnt != NULL) {
 		rpc_task_release_client(task);
 		task->tk_client = clnt;
-		kref_get(&clnt->cl_kref);
+		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
 		/* Add to the client's list of all tasks */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 41a762f82630..8c8eef2b8f26 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v)
 static int
 rpc_info_open(struct inode *inode, struct file *file)
 {
-	struct rpc_clnt *clnt;
+	struct rpc_clnt *clnt = NULL;
 	int ret = single_open(file, rpc_show_info, NULL);
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		mutex_lock(&inode->i_mutex);
-		clnt = RPC_I(inode)->private;
-		if (clnt) {
-			kref_get(&clnt->cl_kref);
+
+		spin_lock(&file->f_path.dentry->d_lock);
+		if (!d_unhashed(file->f_path.dentry))
+			clnt = RPC_I(inode)->private;
+		if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			m->private = clnt;
 		} else {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			single_release(inode, file);
 			ret = -EINVAL;
 		}
-		mutex_unlock(&inode->i_mutex);
 	}
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 10 Sep 2010 16:51:36 +0200
Subject: workqueue: add documentation

Update copyright notice and add Documentation/workqueue.txt.

Randy Dunlap, Dave Chinner: misc fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-By: Florian Mickler <florian@mickler.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
---
 Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/workqueue.h   |   4 +
 kernel/workqueue.c          |  27 ++--
 3 files changed, 401 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/workqueue.txt

(limited to 'include')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
new file mode 100644
index 000000000000..e4498a2872c3
--- /dev/null
+++ b/Documentation/workqueue.txt
@@ -0,0 +1,380 @@
+
+Concurrency Managed Workqueue (cmwq)
+
+September, 2010		Tejun Heo <tj@kernel.org>
+			Florian Mickler <florian@mickler.org>
+
+CONTENTS
+
+1. Introduction
+2. Why cmwq?
+3. The Design
+4. Application Programming Interface (API)
+5. Example Execution Scenarios
+6. Guidelines
+
+
+1. Introduction
+
+There are many cases where an asynchronous process execution context
+is needed and the workqueue (wq) API is the most commonly used
+mechanism for such cases.
+
+When such an asynchronous execution context is needed, a work item
+describing which function to execute is put on a queue.  An
+independent thread serves as the asynchronous execution context.  The
+queue is called workqueue and the thread is called worker.
+
+While there are work items on the workqueue the worker executes the
+functions associated with the work items one after the other.  When
+there is no work item left on the workqueue the worker becomes idle.
+When a new work item gets queued, the worker begins executing again.
+
+
+2. Why cmwq?
+
+In the original wq implementation, a multi threaded (MT) wq had one
+worker thread per CPU and a single threaded (ST) wq had one worker
+thread system-wide.  A single MT wq needed to keep around the same
+number of workers as the number of CPUs.  The kernel grew a lot of MT
+wq users over the years and with the number of CPU cores continuously
+rising, some systems saturated the default 32k PID space just booting
+up.
+
+Although MT wq wasted a lot of resource, the level of concurrency
+provided was unsatisfactory.  The limitation was common to both ST and
+MT wq albeit less severe on MT.  Each wq maintained its own separate
+worker pool.  A MT wq could provide only one execution context per CPU
+while a ST wq one for the whole system.  Work items had to compete for
+those very limited execution contexts leading to various problems
+including proneness to deadlocks around the single execution context.
+
+The tension between the provided level of concurrency and resource
+usage also forced its users to make unnecessary tradeoffs like libata
+choosing to use ST wq for polling PIOs and accepting an unnecessary
+limitation that no two polling PIOs can progress at the same time.  As
+MT wq don't provide much better concurrency, users which require
+higher level of concurrency, like async or fscache, had to implement
+their own thread pool.
+
+Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with
+focus on the following goals.
+
+* Maintain compatibility with the original workqueue API.
+
+* Use per-CPU unified worker pools shared by all wq to provide
+  flexible level of concurrency on demand without wasting a lot of
+  resource.
+
+* Automatically regulate worker pool and level of concurrency so that
+  the API users don't need to worry about such details.
+
+
+3. The Design
+
+In order to ease the asynchronous execution of functions a new
+abstraction, the work item, is introduced.
+
+A work item is a simple struct that holds a pointer to the function
+that is to be executed asynchronously.  Whenever a driver or subsystem
+wants a function to be executed asynchronously it has to set up a work
+item pointing to that function and queue that work item on a
+workqueue.
+
+Special purpose threads, called worker threads, execute the functions
+off of the queue, one after the other.  If no work is queued, the
+worker threads become idle.  These worker threads are managed in so
+called thread-pools.
+
+The cmwq design differentiates between the user-facing workqueues that
+subsystems and drivers queue work items on and the backend mechanism
+which manages thread-pool and processes the queued work items.
+
+The backend is called gcwq.  There is one gcwq for each possible CPU
+and one gcwq to serve work items queued on unbound workqueues.
+
+Subsystems and drivers can create and queue work items through special
+workqueue API functions as they see fit. They can influence some
+aspects of the way the work items are executed by setting flags on the
+workqueue they are putting the work item on. These flags include
+things like CPU locality, reentrancy, concurrency limits and more. To
+get a detailed overview refer to the API description of
+alloc_workqueue() below.
+
+When a work item is queued to a workqueue, the target gcwq is
+determined according to the queue parameters and workqueue attributes
+and appended on the shared worklist of the gcwq.  For example, unless
+specifically overridden, a work item of a bound workqueue will be
+queued on the worklist of exactly that gcwq that is associated to the
+CPU the issuer is running on.
+
+For any worker pool implementation, managing the concurrency level
+(how many execution contexts are active) is an important issue.  cmwq
+tries to keep the concurrency at a minimal but sufficient level.
+Minimal to save resources and sufficient in that the system is used at
+its full capacity.
+
+Each gcwq bound to an actual CPU implements concurrency management by
+hooking into the scheduler.  The gcwq is notified whenever an active
+worker wakes up or sleeps and keeps track of the number of the
+currently runnable workers.  Generally, work items are not expected to
+hog a CPU and consume many cycles.  That means maintaining just enough
+concurrency to prevent work processing from stalling should be
+optimal.  As long as there are one or more runnable workers on the
+CPU, the gcwq doesn't start execution of a new work, but, when the
+last running worker goes to sleep, it immediately schedules a new
+worker so that the CPU doesn't sit idle while there are pending work
+items.  This allows using a minimal number of workers without losing
+execution bandwidth.
+
+Keeping idle workers around doesn't cost other than the memory space
+for kthreads, so cmwq holds onto idle ones for a while before killing
+them.
+
+For an unbound wq, the above concurrency management doesn't apply and
+the gcwq for the pseudo unbound CPU tries to start executing all work
+items as soon as possible.  The responsibility of regulating
+concurrency level is on the users.  There is also a flag to mark a
+bound wq to ignore the concurrency management.  Please refer to the
+API section for details.
+
+Forward progress guarantee relies on that workers can be created when
+more execution contexts are necessary, which in turn is guaranteed
+through the use of rescue workers.  All work items which might be used
+on code paths that handle memory reclaim are required to be queued on
+wq's that have a rescue-worker reserved for execution under memory
+pressure.  Else it is possible that the thread-pool deadlocks waiting
+for execution contexts to free up.
+
+
+4. Application Programming Interface (API)
+
+alloc_workqueue() allocates a wq.  The original create_*workqueue()
+functions are deprecated and scheduled for removal.  alloc_workqueue()
+takes three arguments - @name, @flags and @max_active.  @name is the
+name of the wq and also used as the name of the rescuer thread if
+there is one.
+
+A wq no longer manages execution resources but serves as a domain for
+forward progress guarantee, flush and work item attributes.  @flags
+and @max_active control how work items are assigned execution
+resources, scheduled and executed.
+
+@flags:
+
+  WQ_NON_REENTRANT
+
+	By default, a wq guarantees non-reentrance only on the same
+	CPU.  A work item may not be executed concurrently on the same
+	CPU by multiple workers but is allowed to be executed
+	concurrently on multiple CPUs.  This flag makes sure
+	non-reentrance is enforced across all CPUs.  Work items queued
+	to a non-reentrant wq are guaranteed to be executed by at most
+	one worker system-wide at any given time.
+
+  WQ_UNBOUND
+
+	Work items queued to an unbound wq are served by a special
+	gcwq which hosts workers which are not bound to any specific
+	CPU.  This makes the wq behave as a simple execution context
+	provider without concurrency management.  The unbound gcwq
+	tries to start execution of work items as soon as possible.
+	Unbound wq sacrifices locality but is useful for the following
+	cases.
+
+	* Wide fluctuation in the concurrency level requirement is
+	  expected and using bound wq may end up creating large number
+	  of mostly unused workers across different CPUs as the issuer
+	  hops through different CPUs.
+
+	* Long running CPU intensive workloads which can be better
+	  managed by the system scheduler.
+
+  WQ_FREEZEABLE
+
+	A freezeable wq participates in the freeze phase of the system
+	suspend operations.  Work items on the wq are drained and no
+	new work item starts execution until thawed.
+
+  WQ_RESCUER
+
+	All wq which might be used in the memory reclaim paths _MUST_
+	have this flag set.  This reserves one worker exclusively for
+	the execution of this wq under memory pressure.
+
+  WQ_HIGHPRI
+
+	Work items of a highpri wq are queued at the head of the
+	worklist of the target gcwq and start execution regardless of
+	the current concurrency level.  In other words, highpri work
+	items will always start execution as soon as execution
+	resource is available.
+
+	Ordering among highpri work items is preserved - a highpri
+	work item queued after another highpri work item will start
+	execution after the earlier highpri work item starts.
+
+	Although highpri work items are not held back by other
+	runnable work items, they still contribute to the concurrency
+	level.  Highpri work items in runnable state will prevent
+	non-highpri work items from starting execution.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_CPU_INTENSIVE
+
+	Work items of a CPU intensive wq do not contribute to the
+	concurrency level.  In other words, runnable CPU intensive
+	work items will not prevent other work items from starting
+	execution.  This is useful for bound work items which are
+	expected to hog CPU cycles so that their execution is
+	regulated by the system scheduler.
+
+	Although CPU intensive work items don't contribute to the
+	concurrency level, start of their executions is still
+	regulated by the concurrency management and runnable
+	non-CPU-intensive work items can delay execution of CPU
+	intensive work items.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_HIGHPRI | WQ_CPU_INTENSIVE
+
+	This combination makes the wq avoid interaction with
+	concurrency management completely and behave as a simple
+	per-CPU execution context provider.  Work items queued on a
+	highpri CPU-intensive wq start execution as soon as resources
+	are available and don't affect execution of other work items.
+
+@max_active:
+
+@max_active determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq.  For example,
+with @max_active of 16, at most 16 work items of the wq can be
+executing at the same time per CPU.
+
+Currently, for a bound wq, the maximum limit for @max_active is 512
+and the default value used when 0 is specified is 256.  For an unbound
+wq, the limit is higher of 512 and 4 * num_possible_cpus().  These
+values are chosen sufficiently high such that they are not the
+limiting factor while providing protection in runaway cases.
+
+The number of active work items of a wq is usually regulated by the
+users of the wq, more specifically, by how many work items the users
+may queue at the same time.  Unless there is a specific need for
+throttling the number of active work items, specifying '0' is
+recommended.
+
+Some users depend on the strict execution ordering of ST wq.  The
+combination of @max_active of 1 and WQ_UNBOUND is used to achieve this
+behavior.  Work items on such wq are always queued to the unbound gcwq
+and only one work item can be active at any given time thus achieving
+the same ordering property as ST wq.
+
+
+5. Example Execution Scenarios
+
+The following example execution scenarios try to illustrate how cmwq
+behave under different configurations.
+
+ Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU.
+ w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms
+ again before finishing.  w1 and w2 burn CPU for 5ms then sleep for
+ 10ms.
+
+Ignoring all other tasks, works and processing overhead, and assuming
+simple FIFO scheduling, the following is one highly simplified version
+of possible sequences of events with the original wq.
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 starts and burns CPU
+ 25		w1 sleeps
+ 35		w1 wakes up and finishes
+ 35		w2 starts and burns CPU
+ 40		w2 sleeps
+ 50		w2 wakes up and finishes
+
+And with cmwq with @max_active >= 3,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 10		w2 starts and burns CPU
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+If @max_active == 2,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 20		w2 starts and burns CPU
+ 25		w2 sleeps
+ 35		w2 wakes up and finishes
+
+Now, let's assume w1 and w2 are queued to a different wq q1 which has
+WQ_HIGHPRI set,
+
+ TIME IN MSECS	EVENT
+ 0		w1 and w2 start and burn CPU
+ 5		w1 sleeps
+ 10		w2 sleeps
+ 10		w0 starts and burns CPU
+ 15		w0 sleeps
+ 15		w1 wakes up and finishes
+ 20		w2 wakes up and finishes
+ 25		w0 wakes up and burns CPU
+ 30		w0 finishes
+
+If q1 has WQ_CPU_INTENSIVE set,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 and w2 start and burn CPU
+ 10		w1 sleeps
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+
+6. Guidelines
+
+* Do not forget to use WQ_RESCUER if a wq may process work items which
+  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
+  rescuer thread reserved for it.  If there is dependency among
+  multiple work items used during memory reclaim, they should be
+  queued to separate wq each with WQ_RESCUER.
+
+* Unless strict ordering is required, there is no need to use ST wq.
+
+* Unless there is a specific need, using 0 for @max_active is
+  recommended.  In most use cases, concurrency level usually stays
+  well under the default limit.
+
+* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
+  flush and work item attributes.  Work items which are not involved
+  in memory reclaim and don't need to be flushed as a part of a group
+  of work items, and don't require any special attribute, can use one
+  of the system wq.  There is no difference in execution
+  characteristics between using a dedicated wq and a system wq.
+
+* Unless work items are expected to consume a huge amount of CPU
+  cycles, using a bound wq is usually beneficial due to the increased
+  level of locality in wq operations and work item execution.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f96482..25e02c941bac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
+/*
+ * Workqueue flags and constants.  For details, please refer to
+ * Documentation/workqueue.txt.
+ */
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 727f24e563ae..f77afd939229 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1,19 +1,26 @@
 /*
- * linux/kernel/workqueue.c
+ * kernel/workqueue.c - generic async execution with shared worker pool
  *
- * Generic mechanism for defining kernel helper threads for running
- * arbitrary tasks in process context.
+ * Copyright (C) 2002		Ingo Molnar
  *
- * Started by Ingo Molnar, Copyright (C) 2002
+ *   Derived from the taskqueue/keventd code by:
+ *     David Woodhouse <dwmw2@infradead.org>
+ *     Andrew Morton
+ *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
+ *     Theodore Ts'o <tytso@mit.edu>
  *
- * Derived from the taskqueue/keventd code by:
+ * Made to use alloc_percpu by Christoph Lameter.
  *
- *   David Woodhouse <dwmw2@infradead.org>
- *   Andrew Morton
- *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
- *   Theodore Ts'o <tytso@mit.edu>
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
  *
- * Made to use alloc_percpu by Christoph Lameter.
+ * This is the generic async execution mechanism.  Work items as are
+ * executed in process context.  The worker pool is shared and
+ * automatically managed.  There is one worker pool for each CPU and
+ * one extra for works which are better served by workers which are
+ * not bound to any specific CPU.
+ *
+ * Please read Documentation/workqueue.txt for details.
  */
 
 #include <linux/module.h>
-- 
cgit v1.2.3-59-g8ed1b


From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 9 Sep 2010 23:51:02 +0100
Subject: drm: Use a nondestructive mode for output detect when polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Destructive load-detection is very expensive and due to failings
elsewhere can trigger system wide stalls of up to 600ms. A simple
first step to correcting this is not to invoke such an expensive
and destructive load-detection operation automatically.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265
Reported-by: Bruno Prémont <bonbons@linux-vserver.org>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c           |  4 ++--
 drivers/gpu/drm/drm_sysfs.c                 |  2 +-
 drivers/gpu/drm/i915/intel_crt.c            |  7 ++++++-
 drivers/gpu/drm/i915/intel_dp.c             |  3 ++-
 drivers/gpu/drm/i915/intel_dvo.c            |  4 +++-
 drivers/gpu/drm/i915/intel_hdmi.c           |  3 ++-
 drivers/gpu/drm/i915/intel_lvds.c           |  8 ++++++--
 drivers/gpu/drm/i915/intel_sdvo.c           |  6 ++++--
 drivers/gpu/drm/i915/intel_tv.c             | 12 ++++++------
 drivers/gpu/drm/nouveau/nouveau_connector.c |  8 +++++---
 drivers/gpu/drm/radeon/radeon_connectors.c  | 20 +++++++++++++++-----
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c         |  3 ++-
 include/drm/drm_crtc.h                      |  3 ++-
 13 files changed, 56 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index de152a58967d..fb6b70fc6572 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		connector->status = connector->funcs->detect(connector);
+		connector->status = connector->funcs->detect(connector, false);
 		drm_kms_helper_poll_enable(dev);
 	}
 
@@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work)
 		    !(connector->polled & DRM_CONNECTOR_POLL_HPD))
 			continue;
 
-		status = connector->funcs->detect(connector);
+		status = connector->funcs->detect(connector, true);
 		if (old_status != status)
 			changed = true;
 	}
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 86118a742231..85da4c40694c 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device,
 	struct drm_connector *connector = to_drm_connector(device);
 	enum drm_connector_status status;
 
-	status = connector->funcs->detect(connector);
+	status = connector->funcs->detect(connector, true);
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_connector_status_name(status));
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 4b7735196cd5..0350e5d711f8 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -400,7 +400,9 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder
 	return status;
 }
 
-static enum drm_connector_status intel_crt_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_crt_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -419,6 +421,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto
 	if (intel_crt_detect_ddc(encoder))
 		return connector_status_connected;
 
+	if (nondestructive)
+		return connector->status;
+
 	/* for pre-945g platforms use load detect */
 	if (encoder->crtc && encoder->crtc->enabled) {
 		status = intel_crt_load_detect(encoder->crtc, intel_encoder);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 51d142939a26..e1a2a05fb838 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1386,7 +1386,8 @@ ironlake_dp_detect(struct drm_connector *connector)
  * \return false if DP port is disconnected.
  */
 static enum drm_connector_status
-intel_dp_detect(struct drm_connector *connector)
+intel_dp_detect(struct drm_connector *connector,
+		bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index a399f4b2c1c5..f0de1addf8a4 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -221,7 +221,9 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder,
  *
  * Unimplemented.
  */
-static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_dvo_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ccd4c97e6524..2ea123d8d22b 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -139,7 +139,8 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
 }
 
 static enum drm_connector_status
-intel_hdmi_detect(struct drm_connector *connector)
+intel_hdmi_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 4fbb0165b26f..fb1bed8f4071 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -445,7 +445,9 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
  * connected and closed means disconnected.  We also send hotplug events as
  * needed, using lid status notification from the input layer.
  */
-static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_lvds_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status = connector_status_connected;
@@ -540,7 +542,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 	 * the LID nofication event.
 	 */
 	if (connector)
-		connector->status = connector->funcs->detect(connector);
+		connector->status = connector->funcs->detect(connector,
+							     true);
+
 	/* Don't force modeset on machines where it causes a GPU lockup */
 	if (dmi_check_system(intel_no_modeset_on_lid))
 		return NOTIFY_OK;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index e3b7a7ee39cb..db6b6d4b8fae 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev)
 	if (!analog_connector)
 		return false;
 
-	if (analog_connector->funcs->detect(analog_connector) ==
+	if (analog_connector->funcs->detect(analog_connector, true) ==
 			connector_status_disconnected)
 		return false;
 
@@ -1486,7 +1486,9 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
 	return status;
 }
 
-static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_sdvo_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	uint16_t response;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index c671f60ce80b..d20b550c0f55 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1341,7 +1341,8 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
  * we have a pipe programmed in order to probe the TV.
  */
 static enum drm_connector_status
-intel_tv_detect(struct drm_connector *connector)
+intel_tv_detect(struct drm_connector *connector,
+		bool nondestructive)
 {
 	struct drm_display_mode mode;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -1353,7 +1354,7 @@ intel_tv_detect(struct drm_connector *connector)
 
 	if (encoder->crtc && encoder->crtc->enabled) {
 		type = intel_tv_detect_type(intel_tv);
-	} else {
+	} else if (nondestructive) {
 		struct drm_crtc *crtc;
 		int dpms_mode;
 
@@ -1364,10 +1365,9 @@ intel_tv_detect(struct drm_connector *connector)
 			intel_release_load_detect_pipe(&intel_tv->base, connector,
 						       dpms_mode);
 		} else
-			type = -1;
-	}
-
-	intel_tv->type = type;
+			return connector_status_unknown;
+	} else
+		return connector->status;
 
 	if (type < 0)
 		return connector_status_disconnected;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index a1473fff06ac..67d515cb67e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -168,7 +168,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-nouveau_connector_detect(struct drm_connector *connector)
+nouveau_connector_detect(struct drm_connector *connector,
+			 bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
@@ -246,7 +247,8 @@ detect_analog:
 }
 
 static enum drm_connector_status
-nouveau_connector_detect_lvds(struct drm_connector *connector)
+nouveau_connector_detect_lvds(struct drm_connector *connector,
+			      bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -267,7 +269,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector)
 
 	/* Try retrieving EDID via DDC */
 	if (!dev_priv->vbios.fp_no_ddc) {
-		status = nouveau_connector_detect(connector);
+		status = nouveau_connector_detect(connector, nondestructive);
 		if (status == connector_status_connected)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index a9dd7847d96e..31d309a8e75b 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -481,7 +481,9 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_lvds_detect(struct drm_connector *connector,
+		   bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -594,7 +596,9 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_vga_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
@@ -691,7 +695,9 @@ static int radeon_tv_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_tv_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -748,7 +754,9 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
  * we have to check if this analog encoder is shared with anyone else (TV)
  * if its shared we have to set the other connector to disconnected.
  */
-static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_dvi_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
@@ -972,7 +980,9 @@ static int radeon_dp_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_dp_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 2ff5cf78235f..a527c91c0ba6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-	vmw_ldu_connector_detect(struct drm_connector *connector)
+	vmw_ldu_connector_detect(struct drm_connector *connector,
+				 bool nondestructive)
 {
 	if (vmw_connector_to_ldu(connector)->pref_active)
 		return connector_status_connected;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index c9f3cc5949a8..5536223fbac8 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -386,7 +386,8 @@ struct drm_connector_funcs {
 	void (*dpms)(struct drm_connector *connector, int mode);
 	void (*save)(struct drm_connector *connector);
 	void (*restore)(struct drm_connector *connector);
-	enum drm_connector_status (*detect)(struct drm_connector *connector);
+	enum drm_connector_status (*detect)(struct drm_connector *connector,
+					    bool nondestructive);
 	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
-- 
cgit v1.2.3-59-g8ed1b


From 930a9e283516a3a3595c0c515113f1b78d07f695 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 14 Sep 2010 11:07:23 +0100
Subject: drm: Use a nondestructive mode for output detect when polling (v2)

v2: Julien Cristau pointed out that @nondestructive results in
double-negatives and confusion when trying to interpret the parameter,
so use @force instead. Much easier to type as well. ;-)

And fix the miscompilation of vmgfx reported by Sedat Dilek.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c           |  4 ++--
 drivers/gpu/drm/i915/intel_crt.c            |  5 ++---
 drivers/gpu/drm/i915/intel_dp.c             |  3 +--
 drivers/gpu/drm/i915/intel_dvo.c            |  3 +--
 drivers/gpu/drm/i915/intel_hdmi.c           |  3 +--
 drivers/gpu/drm/i915/intel_lvds.c           |  5 ++---
 drivers/gpu/drm/i915/intel_sdvo.c           |  5 ++---
 drivers/gpu/drm/i915/intel_tv.c             |  5 ++---
 drivers/gpu/drm/nouveau/nouveau_connector.c |  8 +++-----
 drivers/gpu/drm/radeon/radeon_connectors.c  | 15 +++++----------
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c         |  6 +++---
 include/drm/drm_crtc.h                      |  9 ++++++++-
 12 files changed, 32 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index fb6b70fc6572..dcbeb98f195a 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		connector->status = connector->funcs->detect(connector, false);
+		connector->status = connector->funcs->detect(connector, true);
 		drm_kms_helper_poll_enable(dev);
 	}
 
@@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work)
 		    !(connector->polled & DRM_CONNECTOR_POLL_HPD))
 			continue;
 
-		status = connector->funcs->detect(connector, true);
+		status = connector->funcs->detect(connector, false);
 		if (old_status != status)
 			changed = true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0350e5d711f8..a02a8df73727 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -401,8 +401,7 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder
 }
 
 static enum drm_connector_status
-intel_crt_detect(struct drm_connector *connector,
-		 bool nondestructive)
+intel_crt_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -421,7 +420,7 @@ intel_crt_detect(struct drm_connector *connector,
 	if (intel_crt_detect_ddc(encoder))
 		return connector_status_connected;
 
-	if (nondestructive)
+	if (!force)
 		return connector->status;
 
 	/* for pre-945g platforms use load detect */
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e1a2a05fb838..1a51ee07de3e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1386,8 +1386,7 @@ ironlake_dp_detect(struct drm_connector *connector)
  * \return false if DP port is disconnected.
  */
 static enum drm_connector_status
-intel_dp_detect(struct drm_connector *connector,
-		bool nondestructive)
+intel_dp_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index f0de1addf8a4..7c9ec1472d46 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -222,8 +222,7 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder,
  * Unimplemented.
  */
 static enum drm_connector_status
-intel_dvo_detect(struct drm_connector *connector,
-		 bool nondestructive)
+intel_dvo_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 2ea123d8d22b..926934a482ec 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -139,8 +139,7 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
 }
 
 static enum drm_connector_status
-intel_hdmi_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_hdmi_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index fb1bed8f4071..6ec39a86ed06 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -446,8 +446,7 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
  * needed, using lid status notification from the input layer.
  */
 static enum drm_connector_status
-intel_lvds_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_lvds_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status = connector_status_connected;
@@ -543,7 +542,7 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 	 */
 	if (connector)
 		connector->status = connector->funcs->detect(connector,
-							     true);
+							     false);
 
 	/* Don't force modeset on machines where it causes a GPU lockup */
 	if (dmi_check_system(intel_no_modeset_on_lid))
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index db6b6d4b8fae..e8e902d614ed 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev)
 	if (!analog_connector)
 		return false;
 
-	if (analog_connector->funcs->detect(analog_connector, true) ==
+	if (analog_connector->funcs->detect(analog_connector, false) ==
 			connector_status_disconnected)
 		return false;
 
@@ -1487,8 +1487,7 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-intel_sdvo_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_sdvo_detect(struct drm_connector *connector, bool force)
 {
 	uint16_t response;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index d20b550c0f55..4a117e318a73 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1341,8 +1341,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
  * we have a pipe programmed in order to probe the TV.
  */
 static enum drm_connector_status
-intel_tv_detect(struct drm_connector *connector,
-		bool nondestructive)
+intel_tv_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_display_mode mode;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -1354,7 +1353,7 @@ intel_tv_detect(struct drm_connector *connector,
 
 	if (encoder->crtc && encoder->crtc->enabled) {
 		type = intel_tv_detect_type(intel_tv);
-	} else if (nondestructive) {
+	} else if (force) {
 		struct drm_crtc *crtc;
 		int dpms_mode;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 67d515cb67e0..87186a4bbf03 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -168,8 +168,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-nouveau_connector_detect(struct drm_connector *connector,
-			 bool nondestructive)
+nouveau_connector_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
@@ -247,8 +246,7 @@ detect_analog:
 }
 
 static enum drm_connector_status
-nouveau_connector_detect_lvds(struct drm_connector *connector,
-			      bool nondestructive)
+nouveau_connector_detect_lvds(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -269,7 +267,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector,
 
 	/* Try retrieving EDID via DDC */
 	if (!dev_priv->vbios.fp_no_ddc) {
-		status = nouveau_connector_detect(connector, nondestructive);
+		status = nouveau_connector_detect(connector, force);
 		if (status == connector_status_connected)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 31d309a8e75b..ecc1a8fafbfd 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -482,8 +482,7 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_lvds_detect(struct drm_connector *connector,
-		   bool nondestructive)
+radeon_lvds_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -597,8 +596,7 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_vga_detect(struct drm_connector *connector,
-		  bool nondestructive)
+radeon_vga_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
@@ -696,8 +694,7 @@ static int radeon_tv_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_tv_detect(struct drm_connector *connector,
-		 bool nondestructive)
+radeon_tv_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -755,8 +752,7 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
  * if its shared we have to set the other connector to disconnected.
  */
 static enum drm_connector_status
-radeon_dvi_detect(struct drm_connector *connector,
-		  bool nondestructive)
+radeon_dvi_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
@@ -981,8 +977,7 @@ static int radeon_dp_get_modes(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-radeon_dp_detect(struct drm_connector *connector,
-		 bool nondestructive)
+radeon_dp_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index a527c91c0ba6..7083b1a24df3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -336,7 +336,7 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector)
 
 static enum drm_connector_status
 	vmw_ldu_connector_detect(struct drm_connector *connector,
-				 bool nondestructive)
+				 bool force)
 {
 	if (vmw_connector_to_ldu(connector)->pref_active)
 		return connector_status_connected;
@@ -517,7 +517,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
-	connector->status = vmw_ldu_connector_detect(connector);
+	connector->status = vmw_ldu_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS);
@@ -611,7 +611,7 @@ int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
 			ldu->pref_height = 600;
 			ldu->pref_active = false;
 		}
-		con->status = vmw_ldu_connector_detect(con);
+		con->status = vmw_ldu_connector_detect(con, true);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 5536223fbac8..3e5a51af757c 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -386,8 +386,15 @@ struct drm_connector_funcs {
 	void (*dpms)(struct drm_connector *connector, int mode);
 	void (*save)(struct drm_connector *connector);
 	void (*restore)(struct drm_connector *connector);
+
+	/* Check to see if anything is attached to the connector.
+	 * @force is set to false whilst polling, true when checking the
+	 * connector due to user request. @force can be used by the driver
+	 * to avoid expensive, destructive operations during automated
+	 * probing.
+	 */
 	enum drm_connector_status (*detect)(struct drm_connector *connector,
-					    bool nondestructive);
+					    bool force);
 	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
-- 
cgit v1.2.3-59-g8ed1b


From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 7 Sep 2010 16:16:18 -0700
Subject: compat: Make compat_alloc_user_space() incorporate the access_ok()

compat_alloc_user_space() expects the caller to independently call
access_ok() to verify the returned area.  A missing call could
introduce problems on some architectures.

This patch incorporates the access_ok() check into
compat_alloc_user_space() and also adds a sanity check on the length.
The existing compat_alloc_user_space() implementations are renamed
arch_compat_alloc_user_space() and are used as part of the
implementation of the new global function.

This patch assumes NULL will cause __get_user()/__put_user() to either
fail or access userspace on all architectures.  This should be
followed by checking the return value of compat_access_user_space()
for NULL in the callers, at which time the access_ok() in the callers
can also be removed.

Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: <stable@kernel.org>
---
 arch/ia64/include/asm/compat.h    |  2 +-
 arch/mips/include/asm/compat.h    |  2 +-
 arch/parisc/include/asm/compat.h  |  2 +-
 arch/powerpc/include/asm/compat.h |  2 +-
 arch/s390/include/asm/compat.h    |  2 +-
 arch/sparc/include/asm/compat.h   |  2 +-
 arch/tile/include/asm/compat.h    |  2 +-
 arch/x86/include/asm/compat.h     |  2 +-
 include/linux/compat.h            |  3 +++
 kernel/compat.c                   | 21 +++++++++++++++++++++
 10 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
index f90edc85b509..9301a2821615 100644
--- a/arch/ia64/include/asm/compat.h
+++ b/arch/ia64/include/asm/compat.h
@@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr)
 }
 
 static __inline__ void __user *
-compat_alloc_user_space (long len)
+arch_compat_alloc_user_space (long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 613f6912dfc1..dbc51065df5b 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = (struct pt_regs *)
 		((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 02b77baa5da6..efa0b60c63fe 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static __inline__ void __user *compat_alloc_user_space(long len)
+static __inline__ void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = &current->thread.regs;
 	return (void __user *)regs->gr[30];
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 396d21a80058..a11d4eac4f97 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current->thread.regs;
 	unsigned long usp = regs->gpr[1];
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 104f2007f097..a875c2f542e1 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -181,7 +181,7 @@ static inline int is_compat_task(void)
 
 #endif
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	unsigned long stack;
 
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 5016f76ea98a..6f57325bb883 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current_thread_info()->kregs;
 	unsigned long usp = regs->u_regs[UREG_I6];
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 5a34da6cdd79..345d81ce44bb 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr)
 	return (long)(int)(long __force)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *)regs->sp - len;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 306160e58b48..1d9cd27c2920 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *)regs->sp - len;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9ddc8780e8db..5778b559d59c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector, unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
 		struct iovec **ret_pointer);
+
+extern void __user *compat_alloc_user_space(unsigned long len);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index e167efce8423..c9e2ec0b34a8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
 
 	return 0;
 }
+
+/*
+ * Allocate user-space memory for the duration of a single system call,
+ * in order to marshall parameters inside a compat thunk.
+ */
+void __user *compat_alloc_user_space(unsigned long len)
+{
+	void __user *ptr;
+
+	/* If len would occupy more than half of the entire compat space... */
+	if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
+		return NULL;
+
+	ptr = arch_compat_alloc_user_space(len);
+
+	if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
+		return NULL;
+
+	return ptr;
+}
+EXPORT_SYMBOL_GPL(compat_alloc_user_space);
-- 
cgit v1.2.3-59-g8ed1b


From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Wed, 15 Sep 2010 10:27:52 -0700
Subject: tcp: Prevent overzealous packetization by SWS logic.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised
window, the SWS logic will packetize to half the MSS unnecessarily.

This causes problems with some embedded devices.

However for large MSS devices we do want to half-MSS packetize
otherwise we never get enough packets into the pipe for things
like fast retransmit and recovery to work.

Be careful also to handle the case where MSS > window, otherwise
we'll never send until the probe timer.

Reported-by: ツ Leandro Melo de Sales <leandroal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaa9582779d0..3e4b33e36602 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk);
 /* Bound MSS / TSO packet size with the half of the window */
 static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 {
-	if (tp->max_window && pktsize > (tp->max_window >> 1))
-		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	int cutoff;
+
+	/* When peer uses tiny windows, there is no use in packetizing
+	 * to sub-MSS pieces for the sake of SWS or making sure there
+	 * are enough packets in the pipe for fast recovery.
+	 *
+	 * On the other hand, for extremely large MSS devices, handling
+	 * smaller than MSS windows in this way does make sense.
+	 */
+	if (tp->max_window >= 512)
+		cutoff = (tp->max_window >> 1);
+	else
+		cutoff = tp->max_window;
+
+	if (cutoff && pktsize > cutoff)
+		return max_t(int, cutoff, 68U - tp->tcp_header_len);
 	else
 		return pktsize;
 }
-- 
cgit v1.2.3-59-g8ed1b


From f0f9deae9e7c421fa0c1c627beb8e174325e1ba7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 17 Sep 2010 16:55:03 -0700
Subject: netpoll: Disable IRQ around RCU dereference in netpoll_rx

We cannot use rcu_dereference_bh safely in netpoll_rx as we may
be called with IRQs disabled.  We could however simply disable
IRQs as that too causes BH to be disabled and is safe in either
case.

Thanks to John Linville for discovering this bug and providing
a patch.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 791d5109f34c..50d8009be86c 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	bool ret = false;
 
-	rcu_read_lock_bh();
+	local_irq_save(flags);
 	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		goto out;
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb))
 		ret = true;
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	spin_unlock(&npinfo->rx_lock);
 
 out:
-	rcu_read_unlock_bh();
+	local_irq_restore(flags);
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Mon, 20 Sep 2010 11:11:38 -0700
Subject: xfrm: Allow different selector family in temporary state

The family parameter xfrm_state_find is used to find a state matching a
certain policy. This value is set to the template's family
(encap_family) right before xfrm_state_find is called.
The family parameter is however also used to construct a temporary state
in xfrm_state_find itself which is wrong for inter-family scenarios
because it produces a selector for the wrong family. Since this selector
is included in the xfrm_user_acquire structure, user space programs
misinterpret IPv6 addresses as IPv4 and vice versa.
This patch splits up the original init_tempsel function into a part that
initializes the selector respectively the props and id of the temporary
state, to allow for differing ip address families whithin the state.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/ipv4/xfrm4_state.c | 33 +++++++++++++++++++--------------
 net/ipv6/xfrm6_state.c | 33 +++++++++++++++++++--------------
 net/xfrm/xfrm_policy.c |  5 ++---
 net/xfrm/xfrm_state.c  | 45 +++++++++++++++++++++++++++------------------
 5 files changed, 69 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fc8f36dd0f5c..4f53532d4c2f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -298,8 +298,8 @@ struct xfrm_state_afinfo {
 	const struct xfrm_type	*type_map[IPPROTO_MAX];
 	struct xfrm_mode	*mode_map[XFRM_MODE_MAX];
 	int			(*init_flags)(struct xfrm_state *x);
-	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
-						struct xfrm_tmpl *tmpl,
+	void			(*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl);
+	void			(*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1ef1366a0a03..47947624eccc 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x)
 }
 
 static void
-__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+{
+	sel->daddr.a4 = fl->fl4_dst;
+	sel->saddr.a4 = fl->fl4_src;
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET;
+	sel->prefixlen_d = 32;
+	sel->prefixlen_s = 32;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	x->sel.daddr.a4 = fl->fl4_dst;
-	x->sel.saddr.a4 = fl->fl4_src;
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET;
-	x->sel.prefixlen_d = 32;
-	x->sel.prefixlen_s = 32;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
 	x->id = tmpl->id;
 	if (x->id.daddr.a4 == 0)
 		x->id.daddr.a4 = daddr->a4;
@@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
+	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f417b77fa0e1..a67575d472a3 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,23 +20,27 @@
 #include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 {
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
-	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET6;
-	x->sel.prefixlen_d = 128;
-	x->sel.prefixlen_s = 128;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
+	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src);
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
 		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
@@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.eth_proto		= htons(ETH_P_IPV6),
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
+	.init_temprop		= xfrm6_init_temprop,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b3ed7ad4933..cbab6e1a8c9c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
-			family = tmpl->encap_family;
-			if (xfrm_addr_any(local, family)) {
-				error = xfrm_get_saddr(net, &tmp, remote, family);
+			if (xfrm_addr_any(local, tmpl->encap_family)) {
+				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
 				if (error)
 					goto fail;
 				local = &tmp;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5208b12fbfb4..eb96ce52f178 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static int
-xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		  struct xfrm_tmpl *tmpl,
-		  xfrm_address_t *daddr, xfrm_address_t *saddr,
-		  unsigned short family)
+xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
+		    struct xfrm_tmpl *tmpl,
+		    xfrm_address_t *daddr, xfrm_address_t *saddr,
+		    unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	if (!afinfo)
 		return -1;
-	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	afinfo->init_tempsel(&x->sel, fl);
+
+	if (family != tmpl->encap_family) {
+		xfrm_state_put_afinfo(afinfo);
+		afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
+		if (!afinfo)
+			return -1;
+	}
+	afinfo->init_temprop(x, tmpl, daddr, saddr);
 	xfrm_state_put_afinfo(afinfo);
 	return 0;
 }
@@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	unsigned short encap_family = tmpl->encap_family;
 
 	to_put = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family);
+	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best)
 		goto found;
 
-	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 
@@ -829,7 +838,7 @@ found:
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
-					      tmpl->id.proto, family)) != NULL) {
+					      tmpl->id.proto, encap_family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
@@ -839,9 +848,9 @@ found:
 			error = -ENOMEM;
 			goto out;
 		}
-		/* Initialize temporary selector matching only
+		/* Initialize temporary state matching only
 		 * to current session. */
-		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
@@ -856,10 +865,10 @@ found:
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
 			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
-			h = xfrm_src_hash(net, daddr, saddr, family);
+			h = xfrm_src_hash(net, daddr, saddr, encap_family);
 			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 			if (x->id.spi) {
-				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
+				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-- 
cgit v1.2.3-59-g8ed1b


From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 21 Sep 2010 14:35:37 -0700
Subject: fs: {lock,unlock}_flocks() stubs to prepare for BKL removal

The lock structs are currently protected by the BKL, but are accessed by
code in fs/locks.c and misc file system and DLM code.  These stubs will
allow all users to switch to the new interface before the implementation
is changed to a spinlock.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..63d069bd80b7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1093,6 +1093,10 @@ struct file_lock {
 
 #include <linux/fcntl.h>
 
+/* temporary stubs for BKL removal */
+#define lock_flocks() lock_kernel()
+#define unlock_flocks() unlock_kernel()
+
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 #ifdef CONFIG_FILE_LOCKING
-- 
cgit v1.2.3-59-g8ed1b


From 56b49f4b8f6728b91d10c556c116175051b77b60 Mon Sep 17 00:00:00 2001
From: Ollie Wild <aaw@google.com>
Date: Wed, 22 Sep 2010 05:54:54 +0000
Subject: net: Move "struct net" declaration inside the __KERNEL__ macro guard

This patch reduces namespace pollution by moving the "struct net" declaration
out of the userspace-facing portion of linux/netlink.h.  It has no impact on
the kernel.

(This came up because we have several C++ applications which use "net" as a
namespace name.)

Signed-off-by: Ollie Wild <aaw@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 59d066936ab9..123566912d73 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -27,8 +27,6 @@
 
 #define MAX_LINKS 32		
 
-struct net;
-
 struct sockaddr_nl {
 	sa_family_t	nl_family;	/* AF_NETLINK	*/
 	unsigned short	nl_pad;		/* zero		*/
@@ -151,6 +149,8 @@ struct nlattr {
 #include <linux/capability.h>
 #include <linux/skbuff.h>
 
+struct net;
+
 static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 {
 	return (struct nlmsghdr *)skb->data;
-- 
cgit v1.2.3-59-g8ed1b


From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Sat, 14 Aug 2010 15:02:44 +0200
Subject: missing inline keyword for static function in linux/dmaengine.h

Add a missing inline keyword for static function in linux/dmaengine.h to
avoid duplicate symbol definitions.

Signed-off-by: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..e2106495cc11 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
 	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
 }
 
-static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma)
 {
 	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 710224fa2750cf449c02dd115548acebfdd2c86a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 22 Sep 2010 13:04:55 -0700
Subject: arm: fix "arm: fix pci_set_consistent_dma_mask for dmabounce devices"

This fixes the regression caused by the commit 6fee48cd330c68
("dma-mapping: arm: use generic pci_set_dma_mask and
pci_set_consistent_dma_mask").

ARM needs to clip the dma coherent mask for dmabounce devices. This
restores the old trick.

Note that strictly speaking, the DMA API doesn't allow architectures to do
such but I'm not sure it's worth adding the new API to set the dma mask
that allows architectures to clip it.

Reported-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/common/it8152.c                     | 8 ++++++++
 arch/arm/mach-ixp4xx/common-pci.c            | 8 ++++++++
 arch/arm/mach-ixp4xx/include/mach/hardware.h | 2 ++
 arch/arm/mach-pxa/include/mach/hardware.h    | 2 +-
 arch/arm/mach-pxa/include/mach/io.h          | 2 ++
 include/linux/dma-mapping.h                  | 4 ++++
 6 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 7974baacafce..1bec96e85196 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
 		((dma_addr + size - PHYS_OFFSET) >= SZ_64M);
 }
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= PHYS_OFFSET + SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
 {
 	it8152_io.start = IT8152_IO_BASE + 0x12000;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 61cd4d64b985..24498a932ba6 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys)
 	return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys);
 }
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
 
diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h
index f91ca6d4fbe8..8138371c406e 100644
--- a/arch/arm/mach-ixp4xx/include/mach/hardware.h
+++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h
@@ -26,6 +26,8 @@
 #define PCIBIOS_MAX_MEM		0x4BFFFFFF
 #endif
 
+#define ARCH_HAS_DMA_SET_COHERENT_MASK
+
 #define pcibios_assign_all_busses()	1
 
 /* Register locations and bits */
diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h
index 7f64d24cd564..428cc7bda9a4 100644
--- a/arch/arm/mach-pxa/include/mach/hardware.h
+++ b/arch/arm/mach-pxa/include/mach/hardware.h
@@ -309,7 +309,7 @@ extern unsigned long get_clock_tick_rate(void);
 #define PCIBIOS_MIN_IO		0
 #define PCIBIOS_MIN_MEM		0
 #define pcibios_assign_all_busses()	1
+#define ARCH_HAS_DMA_SET_COHERENT_MASK
 #endif
 
-
 #endif  /* _ASM_ARCH_HARDWARE_H */
diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h
index 262691fb97d8..fdca3be47d9b 100644
--- a/arch/arm/mach-pxa/include/mach/io.h
+++ b/arch/arm/mach-pxa/include/mach/io.h
@@ -6,6 +6,8 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
+#include <mach/hardware.h>
+
 #define IO_SPACE_LIMIT 0xffffffff
 
 /*
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ce29b8151198..ba8319ae5fcc 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev)
 	return DMA_BIT_MASK(32);
 }
 
+#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK
+int dma_set_coherent_mask(struct device *dev, u64 mask);
+#else
 static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
 	if (!dma_supported(dev, mask))
@@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 	dev->coherent_dma_mask = mask;
 	return 0;
 }
+#endif
 
 extern u64 dma_get_required_mask(struct device *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 4c894f47bb49284008073d351c0ddaac8860864e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 23 Sep 2010 15:15:19 +0200
Subject: x86/amd-iommu: Work around S3 BIOS bug

This patch adds a workaround for an IOMMU BIOS problem to
the AMD IOMMU driver. The result of the bug is that the
IOMMU does not execute commands anymore when the system
comes out of the S3 state resulting in system failure. The
bug in the BIOS is that is does not restore certain hardware
specific registers correctly. This workaround reads out the
contents of these registers at boot time and restores them
on resume from S3. The workaround is limited to the specific
IOMMU chipset where this problem occurs.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_proto.h |  6 ++++++
 arch/x86/include/asm/amd_iommu_types.h |  9 +++++++++
 arch/x86/kernel/amd_iommu_init.c       | 18 ++++++++++++++++++
 include/linux/pci_ids.h                |  3 +++
 4 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index d2544f1d705d..cb030374b90a 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { }
 
 #endif /* !CONFIG_AMD_IOMMU_STATS */
 
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+	return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ef2d5cd7d7e7..08616180deaf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -414,6 +414,15 @@ struct amd_iommu {
 
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
+
+	/*
+	 * This array is required to work around a potential BIOS bug.
+	 * The BIOS may miss to restore parts of the PCI configuration
+	 * space when the system resumes from S3. The result is that the
+	 * IOMMU does not execute commands anymore which leads to system
+	 * failure.
+	 */
+	u32 cache_cfg[4];
 };
 
 /*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 85e9817ead43..5a170cbbbed8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
+
+	if (is_rd890_iommu(iommu->dev)) {
+		pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
+		pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
+		pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
+		pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
+	}
 }
 
 /*
@@ -1120,6 +1127,16 @@ static void iommu_init_flags(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
 }
 
+static void iommu_apply_quirks(struct amd_iommu *iommu)
+{
+	if (is_rd890_iommu(iommu->dev)) {
+		pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
+		pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
+		pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
+		pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
+	}
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -1130,6 +1147,7 @@ static void enable_iommus(void)
 
 	for_each_iommu(iommu) {
 		iommu_disable(iommu);
+		iommu_apply_quirks(iommu);
 		iommu_init_flags(iommu);
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 10d33309e9a6..570fddeb0388 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -393,6 +393,9 @@
 #define PCI_DEVICE_ID_VLSI_82C147	0x0105
 #define PCI_DEVICE_ID_VLSI_VAS96011	0x0702
 
+/* AMD RD890 Chipset */
+#define PCI_DEVICE_ID_RD890_IOMMU	0x5a23
+
 #define PCI_VENDOR_ID_ADL		0x1005
 #define PCI_DEVICE_ID_ADL_2301		0x2301
 
-- 
cgit v1.2.3-59-g8ed1b


From b3a084b9b684622b149e8dcf03855bf0d5fb588b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Sep 2010 08:38:44 +0200
Subject: rcu: rcu_read_lock_bh_held(): disabling irqs also disables bh

rcu_dereference_bh() doesnt know yet about hard irq being disabled, so
lockdep can trigger in netpoll_rx() after commit f0f9deae9e7c4 (netpoll:
Disable IRQ around RCU dereference in netpoll_rx)

Reported-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a2585d..83af1f8d8b74 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * Makes rcu_dereference_check() do the dirty work.
  */
 #define rcu_dereference_bh(p) \
-		rcu_dereference_check(p, rcu_read_lock_bh_held())
+		rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled())
 
 /**
  * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
-- 
cgit v1.2.3-59-g8ed1b


From f459ffbdfd04edb4a8ce6eea33170eb057a5e695 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 25 Sep 2010 17:45:50 +1000
Subject: drm/radeon: fix PCI ID 5657 to be an RV410

fixes https://bugzilla.kernel.org/show_bug.cgi?id=19012

cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 3a9940ef728b..883c1d439899 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -85,7 +85,6 @@
 	{0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
-	{0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
@@ -103,6 +102,7 @@
 	{0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \
 	{0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \
-- 
cgit v1.2.3-59-g8ed1b


From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 23 Sep 2010 11:19:54 +0000
Subject: net: reset skb queue mapping when rx'ing over tunnel

Reset queue mapping when an skb is reentering the stack via a tunnel.
On second pass, the queue mapping from the original device is no
longer valid.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 81d1413a8701..02386505033d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
 	skb->rxhash = 0;
+	skb_set_queue_mapping(skb, 0);
 	skb_dst_drop(skb);
 	nf_reset(skb);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 24 Sep 2010 09:55:52 +0000
Subject: ipv6: add a missing unregister_pernet_subsys call

Clean up a missing exit path in the ipv6 module init routines.  In
addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys
for the ipv6_addr_label_ops structure.  But if module loading fails, or if the
ipv6 module is removed, there is no corresponding unregister_pernet_subsys call,
which leaves a now-bogus address on the pernet_list, leading to oopses in
subsequent registrations.  This patch cleans up both the failed load path and
the unload path.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/net/addrconf.h |    1 +
 net/ipv6/addrconf.c    |   11 ++++++++---
 net/ipv6/addrlabel.c   |    5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  1 +
 net/ipv6/addrconf.c    | 11 ++++++++---
 net/ipv6/addrlabel.c   |  5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 45375b41a2a0..4d40c4d0230b 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
 extern int			ipv6_addr_label_init(void);
+extern void			ipv6_addr_label_cleanup(void);
 extern void			ipv6_addr_label_rtnl_register(void);
 extern u32			ipv6_addr_label(struct net *net,
 						const struct in6_addr *addr,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab70a3fbcafa..324fac3b6c16 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4637,10 +4637,12 @@ int __init addrconf_init(void)
 	if (err < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf:"
 		       " cannot initialize default policy table: %d.\n", err);
-		return err;
+		goto out;
 	}
 
-	register_pernet_subsys(&addrconf_ops);
+	err = register_pernet_subsys(&addrconf_ops);
+	if (err < 0)
+		goto out_addrlabel;
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -4692,7 +4694,9 @@ errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
 	unregister_pernet_subsys(&addrconf_ops);
-
+out_addrlabel:
+	ipv6_addr_label_cleanup();
+out:
 	return err;
 }
 
@@ -4703,6 +4707,7 @@ void addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
+	ipv6_addr_label_cleanup();
 
 	rtnl_lock();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f0e774cea386..8175f802651b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void)
 	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
+void ipv6_addr_label_cleanup(void)
+{
+	unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
 static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
-- 
cgit v1.2.3-59-g8ed1b


From fb0c5f0bc8b69b40549449ee7fc65f3706f12062 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Mon, 27 Sep 2010 03:31:00 +0000
Subject: tproxy: check for transparent flag in ip_route_newports

as done in ip_route_connect()

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index bd732d62e1c3..7e5e73bfa4de 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol,
 		fl.fl_ip_sport = sport;
 		fl.fl_ip_dport = dport;
 		fl.proto = protocol;
+		if (inet_sk(sk)->transparent)
+			fl.flags |= FLOWI_FLAG_ANYSRC;
 		ip_rt_put(*rp);
 		*rp = NULL;
 		security_sk_classify_flow(sk, &fl);
-- 
cgit v1.2.3-59-g8ed1b


From 31dfbc93923c0aaa0440b809f80ff2830c6a531a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Sep 2010 21:28:30 +0100
Subject: drm: Prune GEM vma entries

Hook the GEM vm open/close ops into the generic drm vm open/close so
that the private vma entries are created and destroy appropriately.
Fixes the leak of the drm_vma_entries during the lifetime of the filp.

Reported-by: Matt Mackall <mpm@selenic.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c |  9 ++++++++-
 drivers/gpu/drm/drm_vm.c  | 28 ++++++++++++++++++----------
 include/drm/drmP.h        |  1 +
 3 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index bf92d07510df..6fe2cd298c12 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -528,6 +528,10 @@ void drm_gem_vm_open(struct vm_area_struct *vma)
 	struct drm_gem_object *obj = vma->vm_private_data;
 
 	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	drm_vm_open_locked(vma);
+	mutex_unlock(&obj->dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_open);
 
@@ -535,7 +539,10 @@ void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
 
-	drm_gem_object_unreference_unlocked(obj);
+	mutex_lock(&obj->dev->struct_mutex);
+	drm_vm_close_locked(vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index fda67468e603..5df450683aab 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -433,15 +433,7 @@ static void drm_vm_open(struct vm_area_struct *vma)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-/**
- * \c close method for all virtual memory types.
- *
- * \param vma virtual memory area.
- *
- * Search the \p vma private data entry in drm_device::vmalist, unlink it, and
- * free it.
- */
-static void drm_vm_close(struct vm_area_struct *vma)
+void drm_vm_close_locked(struct vm_area_struct *vma)
 {
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -451,7 +443,6 @@ static void drm_vm_close(struct vm_area_struct *vma)
 		  vma->vm_start, vma->vm_end - vma->vm_start);
 	atomic_dec(&dev->vma_count);
 
-	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
 		if (pt->vma == vma) {
 			list_del(&pt->head);
@@ -459,6 +450,23 @@ static void drm_vm_close(struct vm_area_struct *vma)
 			break;
 		}
 	}
+}
+
+/**
+ * \c close method for all virtual memory types.
+ *
+ * \param vma virtual memory area.
+ *
+ * Search the \p vma private data entry in drm_device::vmalist, unlink it, and
+ * free it.
+ */
+static void drm_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_file *priv = vma->vm_file->private_data;
+	struct drm_device *dev = priv->minor->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	drm_vm_close_locked(vma);
 	mutex_unlock(&dev->struct_mutex);
 }
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7809d230adee..774e1d49509b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1175,6 +1175,7 @@ extern int drm_release(struct inode *inode, struct file *filp);
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
 extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
 extern void drm_vm_open_locked(struct vm_area_struct *vma);
+extern void drm_vm_close_locked(struct vm_area_struct *vma);
 extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map);
 extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
-- 
cgit v1.2.3-59-g8ed1b


From 2fc11536cf5c0b8eb4eb7e01a2a672a189e9280f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@tandberg.com>
Date: Tue, 7 Sep 2010 06:10:45 -0300
Subject: V4L/DVB: videobuf-dma-sg: set correct size in last sg element

This fixes a nasty memory corruption bug when using userptr I/O.
The function videobuf_pages_to_sg() sets up the scatter-gather list for the
DMA transfer to the userspace pages. The first transfer is setup correctly
(the size is set to PAGE_SIZE - offset), but all other transfers have size
PAGE_SIZE. This is wrong for the last transfer which may be less than PAGE_SIZE.

Most, if not all, drivers will program the boards DMA engine correctly, i.e.
even though the size in the last sg element is wrong, they will do their
own size calculations and make sure the right amount is DMA-ed, and so seemingly
prevent memory corruption.

However, behind the scenes the dynamic DMA mapping support (in lib/swiotlb.c)
may create bounce buffers if the memory pages are not in DMA-able memory.
This happens for example on a 64-bit linux with a board that only supports
32-bit DMA.

These bounce buffers DO use the information in the sg list to determine the
size. So while the DMA engine transfers the correct amount of data, when the
data is 'bounced' back too much is copied, causing buffer overwrites.

The fix is simple: calculate and set the correct size for the last sg list
element.

Signed-off-by: Hans Verkuil <hans.verkuil@tandberg.com>
Cc: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/videobuf-dma-sg.c | 11 +++++++----
 include/media/videobuf-dma-sg.h       |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 06f9a9c2a39a..2ad0bc252b0e 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -94,7 +94,7 @@ err:
  * must free the memory.
  */
 static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
-						int nr_pages, int offset)
+					int nr_pages, int offset, size_t size)
 {
 	struct scatterlist *sglist;
 	int i;
@@ -110,12 +110,14 @@ static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
 		/* DMA to highmem pages might not work */
 		goto highmem;
 	sg_set_page(&sglist[0], pages[0], PAGE_SIZE - offset, offset);
+	size -= PAGE_SIZE - offset;
 	for (i = 1; i < nr_pages; i++) {
 		if (NULL == pages[i])
 			goto nopage;
 		if (PageHighMem(pages[i]))
 			goto highmem;
-		sg_set_page(&sglist[i], pages[i], PAGE_SIZE, 0);
+		sg_set_page(&sglist[i], pages[i], min(PAGE_SIZE, size), 0);
+		size -= min(PAGE_SIZE, size);
 	}
 	return sglist;
 
@@ -170,7 +172,8 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 
 	first = (data          & PAGE_MASK) >> PAGE_SHIFT;
 	last  = ((data+size-1) & PAGE_MASK) >> PAGE_SHIFT;
-	dma->offset   = data & ~PAGE_MASK;
+	dma->offset = data & ~PAGE_MASK;
+	dma->size = size;
 	dma->nr_pages = last-first+1;
 	dma->pages = kmalloc(dma->nr_pages * sizeof(struct page *), GFP_KERNEL);
 	if (NULL == dma->pages)
@@ -252,7 +255,7 @@ int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
 
 	if (dma->pages) {
 		dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages,
-						   dma->offset);
+						   dma->offset, dma->size);
 	}
 	if (dma->vaddr) {
 		dma->sglist = videobuf_vmalloc_to_sg(dma->vaddr,
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 97e07f46a0fa..aa4ebb42a565 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -48,6 +48,7 @@ struct videobuf_dmabuf {
 
 	/* for userland buffer */
 	int                 offset;
+	size_t		    size;
 	struct page         **pages;
 
 	/* for kernel buffers */
-- 
cgit v1.2.3-59-g8ed1b


From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Sep 2010 20:24:54 -0700
Subject: tcp: Fix >4GB writes on 64-bit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes kernel bugzilla #16603

tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write
zero bytes, for example.

There is also the problem higher up of how verify_iovec() works.  It
wants to prevent the total length from looking like an error return
value.

However it does this using 'int', but syscalls return 'long' (and
thus signed 64-bit on 64-bit machines).  So it could trigger
false-positives on 64-bit as written.  So fix it to use 'long'.

Reported-by: Olaf Bonorden <bono@onlinehome.de>
Reported-by: Daniel Büse <dbuese@gmx.de>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 net/core/iovec.c       | 5 +++--
 net/ipv4/tcp.c         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index a2fada9becb6..a8f56e1ec760 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  int offset, 
 					  unsigned int len, __wsum *csump);
 
-extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
+extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412df..e6b133b77ccb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,9 +35,10 @@
  *	in any case.
  */
 
-int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, err, ct;
+	int size, ct;
+	long err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95d75d443927..f115ea68a4ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	sg = sk->sk_route_caps & NETIF_F_SG;
 
 	while (--iovlen >= 0) {
-		int seglen = iov->iov_len;
+		size_t seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
 
 		iov++;
-- 
cgit v1.2.3-59-g8ed1b


From 58f87ed0d45141a90167f34c0959d607160a26df Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.de.marchi@gmail.com>
Date: Tue, 7 Sep 2010 12:49:45 -0400
Subject: ACPI: Fix typos

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/exutils.c    | 2 +-
 drivers/acpi/acpica/rsutils.c    | 2 +-
 drivers/acpi/apei/Kconfig        | 2 +-
 drivers/acpi/apei/erst-dbg.c     | 2 +-
 drivers/acpi/apei/erst.c         | 2 +-
 drivers/acpi/bus.c               | 4 ++--
 drivers/acpi/processor_perflib.c | 4 ++--
 include/acpi/acpixf.h            | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 74c24d517f81..4093522eed45 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -109,7 +109,7 @@ void acpi_ex_enter_interpreter(void)
  *
  * DESCRIPTION: Reacquire the interpreter execution region from within the
  *              interpreter code. Failure to enter the interpreter region is a
- *              fatal system error. Used in  conjuction with
+ *              fatal system error. Used in  conjunction with
  *              relinquish_interpreter
  *
  ******************************************************************************/
diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c
index 22cfcfbd9fff..491191e6cf69 100644
--- a/drivers/acpi/acpica/rsutils.c
+++ b/drivers/acpi/acpica/rsutils.c
@@ -149,7 +149,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type)
 
 			/*
 			 * 16-, 32-, and 64-bit cases must use the move macros that perform
-			 * endian conversion and/or accomodate hardware that cannot perform
+			 * endian conversion and/or accommodate hardware that cannot perform
 			 * misaligned memory transfers
 			 */
 		case ACPI_RSC_MOVE16:
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 907e350f1c7d..fca34ccfd294 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -34,6 +34,6 @@ config ACPI_APEI_ERST_DEBUG
 	depends on ACPI_APEI
 	help
 	  ERST is a way provided by APEI to save and retrieve hardware
-	  error infomation to and from a persistent store. Enable this
+	  error information to and from a persistent store. Enable this
 	  if you want to debugging and testing the ERST kernel support
 	  and firmware implementation.
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index 5281ddda2777..98ffa2991ebc 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -2,7 +2,7 @@
  * APEI Error Record Serialization Table debug support
  *
  * ERST is a way provided by APEI to save and retrieve hardware error
- * infomation to and from a persistent store. This file provide the
+ * information to and from a persistent store. This file provide the
  * debugging/testing support for ERST kernel support and firmware
  * implementation.
  *
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 18645f4e83cd..a4904f1680cf 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -2,7 +2,7 @@
  * APEI Error Record Serialization Table support
  *
  * ERST is a way provided by APEI to save and retrieve hardware error
- * infomation to and from a persistent store.
+ * information to and from a persistent store.
  *
  * For more information about ERST, please refer to ACPI Specification
  * version 4.0, section 17.4.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 5c221ab535d5..cc17b352d1c5 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(acpi_root_dir);
 static int set_power_nocheck(const struct dmi_system_id *id)
 {
 	printk(KERN_NOTICE PREFIX "%s detected - "
-		"disable power check in power transistion\n", id->ident);
+		"disable power check in power transition\n", id->ident);
 	acpi_power_nocheck = 1;
 	return 0;
 }
@@ -1027,7 +1027,7 @@ static int __init acpi_init(void)
 
 	/*
 	 * If the laptop falls into the DMI check table, the power state check
-	 * will be disabled in the course of device power transistion.
+	 * will be disabled in the course of device power transition.
 	 */
 	dmi_check_system(power_nocheck_dmi_table);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index ba1bd263d903..3a73a93596e8 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -447,8 +447,8 @@ int acpi_processor_notify_smm(struct module *calling_module)
 	if (!try_module_get(calling_module))
 		return -EINVAL;
 
-	/* is_done is set to negative if an error occured,
-	 * and to postitive if _no_ error occured, but SMM
+	/* is_done is set to negative if an error occurred,
+	 * and to postitive if _no_ error occurred, but SMM
 	 * was already notified. This avoids double notification
 	 * which might lead to unexpected results...
 	 */
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c0786d446a00..984cdc62e30b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -55,7 +55,7 @@
 extern u8 acpi_gbl_permanent_mmap;
 
 /*
- * Globals that are publically available, allowing for
+ * Globals that are publicly available, allowing for
  * run time configuration
  */
 extern u32 acpi_dbg_level;
-- 
cgit v1.2.3-59-g8ed1b


From e454c844644683571617896ab2a4ce0109c1943e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 21 Sep 2010 16:31:11 -0300
Subject: Bluetooth: Fix deadlock in the ERTM logic

The Enhanced Retransmission Mode(ERTM) is a realiable mode of operation
of the Bluetooth L2CAP layer. Think on it like a simplified version of
TCP.
The problem we were facing here was a deadlock. ERTM uses a backlog
queue to queue incomimg packets while the user is helding the lock. At
some moment the sk_sndbuf can be exceeded and we can't alloc new skbs
then the code sleep with the lock to wait for memory, that stalls the
ERTM connection once we can't read the acknowledgements packets in the
backlog queue to free memory and make the allocation of outcoming skb
successful.

This patch actually affect all users of bt_skb_send_alloc(), i.e., all
L2CAP modes and SCO.

We are safe against socket states changes or channels deletion while the
we are sleeping wait memory. Checking for the sk->sk_err and
sk->sk_shutdown make the code safe, since any action that can leave the
socket or the channel in a not usable state set one of the struct
members at least. Then we can check both of them when getting the lock
again and return with the proper error if something unexpected happens.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Ulisses Furquim <ulisses@profusion.mobi>
---
 include/net/bluetooth/bluetooth.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 27a902d9b3a9..30fce0128dd7 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l
 {
 	struct sk_buff *skb;
 
+	release_sock(sk);
 	if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) {
 		skb_reserve(skb, BT_SKB_RESERVE);
 		bt_cb(skb)->incoming  = 0;
 	}
+	lock_sock(sk);
+
+	if (!skb && *err)
+		return NULL;
+
+	*err = sock_error(sk);
+	if (*err)
+		goto out;
+
+	if (sk->sk_shutdown) {
+		*err = -ECONNRESET;
+		goto out;
+	}
 
 	return skb;
+
+out:
+	kfree_skb(skb);
+	return NULL;
 }
 
 int bt_err(__u16 code);
-- 
cgit v1.2.3-59-g8ed1b


From 29d08b3efddca628b0360411ab2b85f7b1723f48 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 27 Sep 2010 16:17:17 +1000
Subject: drm/gem: handlecount isn't really a kref so don't make it one.

There were lots of places being inconsistent since handle count
looked like a kref but it really wasn't.

Fix this my just making handle count an atomic on the object,
and have it increase the normal object kref.

Now i915/radeon/nouveau drivers can drop the normal reference on
userspace object creation, and have the handle hold it.

This patch fixes a memory leak or corruption on unload, because
the driver had no way of knowing if a handle had been actually
added for this object, and the fbcon object needed to know this
to clean itself up properly.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c                  |  8 ++------
 drivers/gpu/drm/drm_info.c                 |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  6 ++----
 drivers/gpu/drm/i915/intel_fb.c            |  4 +++-
 drivers/gpu/drm/nouveau/nouveau_fbcon.c    |  1 +
 drivers/gpu/drm/nouveau/nouveau_gem.c      |  6 ++----
 drivers/gpu/drm/nouveau/nouveau_notifier.c |  1 +
 drivers/gpu/drm/radeon/radeon_display.c    |  3 ++-
 drivers/gpu/drm/radeon/radeon_fb.c         | 14 ++++----------
 drivers/gpu/drm/radeon/radeon_gem.c        |  4 ++--
 include/drm/drmP.h                         | 18 +++++++++++++-----
 11 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6fe2cd298c12..f7e61be8430a 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -148,7 +148,7 @@ int drm_gem_object_init(struct drm_device *dev,
 		return -ENOMEM;
 
 	kref_init(&obj->refcount);
-	kref_init(&obj->handlecount);
+	atomic_set(&obj->handle_count, 0);
 	obj->size = size;
 
 	atomic_inc(&dev->object_count);
@@ -496,12 +496,8 @@ static void drm_gem_object_ref_bug(struct kref *list_kref)
  * called before drm_gem_object_free or we'll be touching
  * freed memory
  */
-void
-drm_gem_object_handle_free(struct kref *kref)
+void drm_gem_object_handle_free(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = container_of(kref,
-						  struct drm_gem_object,
-						  handlecount);
 	struct drm_device *dev = obj->dev;
 
 	/* Remove any name for this object */
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 2ef2c7827243..974e970ce3f8 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -255,7 +255,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data)
 
 	seq_printf(m, "%6d %8zd %7d %8d\n",
 		   obj->name, obj->size,
-		   atomic_read(&obj->handlecount.refcount),
+		   atomic_read(&obj->handle_count),
 		   atomic_read(&obj->refcount.refcount));
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf4ffbee1c00..4cdf74264ee8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -136,14 +136,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 
 	ret = drm_gem_handle_create(file_priv, obj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(obj);
 	if (ret) {
-		drm_gem_object_unreference_unlocked(obj);
 		return ret;
 	}
 
-	/* Sink the floating reference from kref_init(handlecount) */
-	drm_gem_object_handle_unreference_unlocked(obj);
-
 	args->handle = handle;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 7bdc96256bf5..56ad9df2ccb5 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -237,8 +237,10 @@ int intel_fbdev_destroy(struct drm_device *dev,
 	drm_fb_helper_fini(&ifbdev->helper);
 
 	drm_framebuffer_cleanup(&ifb->base);
-	if (ifb->obj)
+	if (ifb->obj) {
+		drm_gem_object_handle_unreference(ifb->obj);
 		drm_gem_object_unreference(ifb->obj);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index dbd30b2e43fd..d2047713dc59 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -352,6 +352,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev)
 
 	if (nouveau_fb->nvbo) {
 		nouveau_bo_unmap(nouveau_fb->nvbo);
+		drm_gem_object_handle_unreference_unlocked(nouveau_fb->nvbo->gem);
 		drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem);
 		nouveau_fb->nvbo = NULL;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index ead7b8fc53fc..19620a6709f5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -167,11 +167,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 		goto out;
 
 	ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(nvbo->gem);
 out:
-	drm_gem_object_handle_unreference_unlocked(nvbo->gem);
-
-	if (ret)
-		drm_gem_object_unreference_unlocked(nvbo->gem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 3ec181ff50ce..3c9964a8fbad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -79,6 +79,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan)
 	mutex_lock(&dev->struct_mutex);
 	nouveau_bo_unpin(chan->notifier_bo);
 	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(chan->notifier_bo->gem);
 	drm_gem_object_unreference_unlocked(chan->notifier_bo->gem);
 	drm_mm_takedown(&chan->notifier_heap);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 7422f274615a..b92d2f2fcbed 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -843,8 +843,9 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
 
-	if (radeon_fb->obj)
+	if (radeon_fb->obj) {
 		drm_gem_object_unreference_unlocked(radeon_fb->obj);
+	}
 	drm_framebuffer_cleanup(fb);
 	kfree(radeon_fb);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index c74a8b20d941..9cdf6a35bc2c 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -94,8 +94,10 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj)
 	ret = radeon_bo_reserve(rbo, false);
 	if (likely(ret == 0)) {
 		radeon_bo_kunmap(rbo);
+		radeon_bo_unpin(rbo);
 		radeon_bo_unreserve(rbo);
 	}
+	drm_gem_object_handle_unreference(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
 }
 
@@ -325,8 +327,6 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb
 {
 	struct fb_info *info;
 	struct radeon_framebuffer *rfb = &rfbdev->rfb;
-	struct radeon_bo *rbo;
-	int r;
 
 	if (rfbdev->helper.fbdev) {
 		info = rfbdev->helper.fbdev;
@@ -338,14 +338,8 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb
 	}
 
 	if (rfb->obj) {
-		rbo = rfb->obj->driver_private;
-		r = radeon_bo_reserve(rbo, false);
-		if (likely(r == 0)) {
-			radeon_bo_kunmap(rbo);
-			radeon_bo_unpin(rbo);
-			radeon_bo_unreserve(rbo);
-		}
-		drm_gem_object_unreference_unlocked(rfb->obj);
+		radeonfb_destroy_pinned_object(rfb->obj);
+		rfb->obj = NULL;
 	}
 	drm_fb_helper_fini(&rfbdev->helper);
 	drm_framebuffer_cleanup(&rfb->base);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index c578f265b24c..d1e595d91723 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -201,11 +201,11 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 		return r;
 	}
 	r = drm_gem_handle_create(filp, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {
-		drm_gem_object_unreference_unlocked(gobj);
 		return r;
 	}
-	drm_gem_object_handle_unreference_unlocked(gobj);
 	args->handle = handle;
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 774e1d49509b..07e4726a4ee0 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -612,7 +612,7 @@ struct drm_gem_object {
 	struct kref refcount;
 
 	/** Handle count of this object. Each handle also holds a reference */
-	struct kref handlecount;
+	atomic_t handle_count; /* number of handles on this object */
 
 	/** Related drm device */
 	struct drm_device *dev;
@@ -1461,7 +1461,7 @@ struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 int drm_gem_object_init(struct drm_device *dev,
 			struct drm_gem_object *obj, size_t size);
-void drm_gem_object_handle_free(struct kref *kref);
+void drm_gem_object_handle_free(struct drm_gem_object *obj);
 void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -1496,7 +1496,7 @@ static inline void
 drm_gem_object_handle_reference(struct drm_gem_object *obj)
 {
 	drm_gem_object_reference(obj);
-	kref_get(&obj->handlecount);
+	atomic_inc(&obj->handle_count);
 }
 
 static inline void
@@ -1505,12 +1505,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
 	if (obj == NULL)
 		return;
 
+	if (atomic_read(&obj->handle_count) == 0)
+		return;
 	/*
 	 * Must bump handle count first as this may be the last
 	 * ref, in which case the object would disappear before we
 	 * checked for a name
 	 */
-	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+	if (atomic_dec_and_test(&obj->handle_count))
+		drm_gem_object_handle_free(obj);
 	drm_gem_object_unreference(obj);
 }
 
@@ -1520,12 +1523,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
 	if (obj == NULL)
 		return;
 
+	if (atomic_read(&obj->handle_count) == 0)
+		return;
+
 	/*
 	* Must bump handle count first as this may be the last
 	* ref, in which case the object would disappear before we
 	* checked for a name
 	*/
-	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+
+	if (atomic_dec_and_test(&obj->handle_count))
+		drm_gem_object_handle_free(obj);
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 6110a1f43c27b516e16d5ce8860fca50748c2a87 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 30 Sep 2010 21:19:07 -0400
Subject: intel_idle: Voluntary leave_mm before entering deeper

Avoid TLB flush IPIs for the cores in deeper c-states by voluntary leave_mm()
before entering into that state. CPUs tend to flush TLB in those c-states
anyways.

acpi_idle does this with C3-type states, but it was not caried over
when intel_idle was introduced.  intel_idle can apply it
to C-states in addition to those that ACPI might export as C3...

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 18 ++++++++++++++----
 include/linux/cpuidle.h   |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 96bf38097996..0906fc5b69b9 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -108,7 +108,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "NHM-C3",
 		.desc = "MWAIT 0x10",
 		.driver_data = (void *) 0x10,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
 		.power_usage = 500,
 		.target_residency = 80,
@@ -117,7 +117,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "NHM-C6",
 		.desc = "MWAIT 0x20",
 		.driver_data = (void *) 0x20,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.power_usage = 350,
 		.target_residency = 800,
@@ -149,7 +149,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "ATM-C4",
 		.desc = "MWAIT 0x30",
 		.driver_data = (void *) 0x30,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
 		.power_usage = 250,
 		.target_residency = 400,
@@ -159,7 +159,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "ATM-C6",
 		.desc = "MWAIT 0x40",
 		.driver_data = (void *) 0x40,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.power_usage = 150,
 		.target_residency = 800,
@@ -185,6 +185,16 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 
 	local_irq_disable();
 
+	/*
+	 * If the state flag indicates that the TLB will be flushed or if this
+	 * is the deepest c-state supported, do a voluntary leave mm to avoid
+	 * costly and mostly unnecessary wakeups for flushing the user TLB's
+	 * associated with the active mm.
+	 */
+	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED ||
+	    (&dev->states[dev->state_count - 1] == state))
+		leave_mm(cpu);
+
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36ca9721a0c2..1be416bbbb82 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -53,6 +53,7 @@ struct cpuidle_state {
 #define CPUIDLE_FLAG_BALANCED	(0x40) /* medium latency, moderate savings */
 #define CPUIDLE_FLAG_DEEP	(0x80) /* high latency, large savings */
 #define CPUIDLE_FLAG_IGNORE	(0x100) /* ignore during this idle period */
+#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-- 
cgit v1.2.3-59-g8ed1b


From 39b4d07aa3583ceefe73622841303a0a3e942ca1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 30 Sep 2010 09:10:26 +0100
Subject: drm: Hold the mutex when dropping the last GEM reference (v2)

In order to be fully threadsafe we need to check that the drm_gem_object
refcount is still 0 after acquiring the mutex in order to call the free
function. Otherwise, we may encounter scenarios like:

Thread A:                                        Thread B:
drm_gem_close
unreference_unlocked
kref_put                                         mutex_lock
...                                              i915_gem_evict
...                                              kref_get -> BUG
...                                              i915_gem_unbind
...                                              kref_put
...                                              i915_gem_object_free
...                                              mutex_unlock
mutex_lock
i915_gem_object_free -> BUG
i915_gem_object_unbind
kfree
mutex_unlock

Note that no driver is currently using the free_unlocked vfunc and it is
scheduled for removal, hasten that process.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30454
Reported-and-Tested-by: Magnus Kessler <Magnus.Kessler@gmx.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c | 22 ----------------------
 include/drm/drmP.h        | 10 ++++++----
 2 files changed, 6 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index f7e61be8430a..5663d2719063 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -462,28 +462,6 @@ drm_gem_object_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
-/**
- * Called after the last reference to the object has been lost.
- * Must be called without holding struct_mutex
- *
- * Frees the object
- */
-void
-drm_gem_object_free_unlocked(struct kref *kref)
-{
-	struct drm_gem_object *obj = (struct drm_gem_object *) kref;
-	struct drm_device *dev = obj->dev;
-
-	if (dev->driver->gem_free_object_unlocked != NULL)
-		dev->driver->gem_free_object_unlocked(obj);
-	else if (dev->driver->gem_free_object != NULL) {
-		mutex_lock(&dev->struct_mutex);
-		dev->driver->gem_free_object(obj);
-		mutex_unlock(&dev->struct_mutex);
-	}
-}
-EXPORT_SYMBOL(drm_gem_object_free_unlocked);
-
 static void drm_gem_object_ref_bug(struct kref *list_kref)
 {
 	BUG();
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 07e4726a4ee0..4c9461a4f9e6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -808,7 +808,6 @@ struct drm_driver {
 	 */
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
-	void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
 
 	/* vga arb irq handler */
 	void (*vgaarb_irq)(struct drm_device *dev, bool state);
@@ -1456,7 +1455,6 @@ int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_release(struct drm_gem_object *obj);
 void drm_gem_object_free(struct kref *kref);
-void drm_gem_object_free_unlocked(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 int drm_gem_object_init(struct drm_device *dev,
@@ -1484,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj)
 static inline void
 drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
 {
-	if (obj != NULL)
-		kref_put(&obj->refcount, drm_gem_object_free_unlocked);
+	if (obj != NULL) {
+		struct drm_device *dev = obj->dev;
+		mutex_lock(&dev->struct_mutex);
+		kref_put(&obj->refcount, drm_gem_object_free);
+		mutex_unlock(&dev->struct_mutex);
+	}
 }
 
 int drm_gem_handle_create(struct drm_file *file_priv,
-- 
cgit v1.2.3-59-g8ed1b


From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 5 Oct 2010 11:29:27 -0700
Subject: modules: Fix module_bug_list list corruption race

With all the recent module loading cleanups, we've minimized the code
that sits under module_mutex, fixing various deadlocks and making it
possible to do most of the module loading in parallel.

However, that whole conversion totally missed the rather obscure code
that adds a new module to the list for BUG() handling.  That code was
doubly obscure because (a) the code itself lives in lib/bugs.c (for
dubious reasons) and (b) it gets called from the architecture-specific
"module_finalize()" rather than from generic code.

Calling it from arch-specific code makes no sense what-so-ever to begin
with, and is now actively wrong since that code isn't protected by the
module loading lock any more.

So this commit moves the "module_bug_{finalize,cleanup}()" calls away
from the arch-specific code, and into the generic code - and in the
process protects it with the module_mutex so that the list operations
are now safe.

Future fixups:
 - move the module list handling code into kernel/module.c where it
   belongs.
 - get rid of 'module_bug_list' and just use the regular list of modules
   (called 'modules' - imagine that) that we already create and maintain
   for other reasons.

Reported-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Adrian Bunk <bunk@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/kernel/module.c   | 3 +--
 arch/h8300/kernel/module.c   | 3 +--
 arch/mn10300/kernel/module.c | 3 +--
 arch/parisc/kernel/module.c  | 3 +--
 arch/powerpc/kernel/module.c | 5 -----
 arch/s390/kernel/module.c    | 3 +--
 arch/sh/kernel/module.c      | 2 --
 arch/x86/kernel/module.c     | 3 +--
 include/linux/module.h       | 5 ++---
 kernel/module.c              | 4 ++++
 lib/bug.c                    | 6 ++----
 11 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
index 98f94d041d9c..a727f54d64d6 100644
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	vfree(module->arch.syminfo);
 	module->arch.syminfo = NULL;
 
-	return module_bug_finalize(hdr, sechdrs, module);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *module)
 {
-	module_bug_cleanup(module);
 }
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c
index 0865e291c20d..db4953dc4e1b 100644
--- a/arch/h8300/kernel/module.c
+++ b/arch/h8300/kernel/module.c
@@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c
index 6aea7fd76993..196a111e2e29 100644
--- a/arch/mn10300/kernel/module.c
+++ b/arch/mn10300/kernel/module.c
@@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 /*
@@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr,
  */
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 159a2b81e90c..6e81bb596e5b 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 	nsyms = newptr - (Elf_Sym *)symhdr->sh_addr;
 	DEBUGP("NEW num_symtab %lu\n", nsyms);
 	symhdr->sh_size = nsyms * sizeof(Elf_Sym);
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	deregister_unwind_table(mod);
-	module_bug_cleanup(mod);
 }
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 477c663e0140..4ef93ae2235f 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const Elf_Shdr *sect;
 	int err;
 
-	err = module_bug_finalize(hdr, sechdrs, me);
-	if (err)
-		return err;
-
 	/* Apply feature fixups */
 	sect = find_section(hdr, sechdrs, "__ftr_fixup");
 	if (sect != NULL)
@@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr,
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 22cfd634c355..f7167ee4604c 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 {
 	vfree(me->arch.syminfo);
 	me->arch.syminfo = NULL;
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index 43adddfe4c04..ae0be697a89e 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 	int ret = 0;
 
 	ret |= module_dwarf_finalize(hdr, sechdrs, me);
-	ret |= module_bug_finalize(hdr, sechdrs, me);
 
 	return ret;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 	module_dwarf_cleanup(mod);
 }
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e0bc186d7501..1c355c550960 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 		apply_paravirt(pseg, pseg + para->sh_size);
 	}
 
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	alternatives_smp_module_del(mod);
-	module_bug_cleanup(mod);
 }
diff --git a/include/linux/module.h b/include/linux/module.h
index 8a6b9fdc7ffa..aace066bad8f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -686,17 +686,16 @@ extern int module_sysfs_initialized;
 
 
 #ifdef CONFIG_GENERIC_BUG
-int  module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
 			 struct module *);
 void module_bug_cleanup(struct module *);
 
 #else	/* !CONFIG_GENERIC_BUG */
 
-static inline int  module_bug_finalize(const Elf_Ehdr *hdr,
+static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 					const Elf_Shdr *sechdrs,
 					struct module *mod)
 {
-	return 0;
 }
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
diff --git a/kernel/module.c b/kernel/module.c
index d0b5f8db11b4..ccd641991842 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod)
 {
 	struct module *mod = _mod;
 	list_del(&mod->list);
+	module_bug_cleanup(mod);
 	return 0;
 }
 
@@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod,
 	if (err < 0)
 		goto ddebug;
 
+	module_bug_finalize(info.hdr, info.sechdrs, mod);
 	list_add_rcu(&mod->list, &modules);
 	mutex_unlock(&module_mutex);
 
@@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod,
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	module_bug_cleanup(mod);
+
  ddebug:
 	if (!mod->taints)
 		dynamic_debug_remove(info.debug);
diff --git a/lib/bug.c b/lib/bug.c
index 7cdfad88128f..19552096d16b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 	return NULL;
 }
 
-int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-			struct module *mod)
+void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+			 struct module *mod)
 {
 	char *secstrings;
 	unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	 * could potentially lead to deadlock and thus be counter-productive.
 	 */
 	list_add(&mod->bug_list, &module_bug_list);
-
-	return 0;
 }
 
 void module_bug_cleanup(struct module *mod)
-- 
cgit v1.2.3-59-g8ed1b


From 231d0aefd88e94129cb8fb84794f9bb788c6366e Mon Sep 17 00:00:00 2001
From: Evgeny Kuznetsov <ext-eugeny.kuznetsov@nokia.com>
Date: Tue, 5 Oct 2010 12:47:57 +0400
Subject: wait: using uninitialized member of wait queue

The "flags" member of "struct wait_queue_t" is used in several places in
the kernel code without beeing initialized by init_wait().  "flags" is
used in bitwise operations.

If "flags" not initialized then unexpected behaviour may take place.
Incorrect flags might used later in code.

Added initialization of "wait_queue_t.flags" with zero value into
"init_wait".

Signed-off-by: Evgeny Kuznetsov <EXT-Eugeny.Kuznetsov@nokia.com>
[ The bit we care about does end up being initialized by both
   prepare_to_wait() and add_to_wait_queue(), so this doesn't seem to
   cause actual bugs, but is definitely the right thing to do -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0836ccc57121..3efc9f3f43a0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 		(wait)->private = current;				\
 		(wait)->func = autoremove_wake_function;		\
 		INIT_LIST_HEAD(&(wait)->task_list);			\
+		(wait)->flags = 0;					\
 	} while (0)
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Sep 2010 12:36:45 +0200
Subject: drm/ttm: Fix two race conditions + fix busy codepaths

This fixes a race pointed out by Dave Airlie where we don't take a buffer
object about to be destroyed off the LRU lists properly. It also fixes a rare
case where a buffer object could be destroyed in the middle of an
accelerated eviction.

The patch also adds a utility function that can be used to prematurely
release GPU memory space usage of an object waiting to be destroyed.
For example during eviction or swapout.

The above mentioned commit didn't queue the buffer on the delayed destroy
list under some rare circumstances. It also didn't completely honor the
remove_all parameter.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=615505
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 83 +++++++++++++++++++++++++++++++++++++-------
 include/drm/ttm/ttm_bo_api.h |  4 ++-
 2 files changed, 74 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb4cf7ef4d1e..db809e034cc4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -441,6 +441,43 @@ out_err:
 	return ret;
 }
 
+/**
+ * Call bo::reserved and with the lru lock held.
+ * Will release GPU memory type usage on destruction.
+ * This is the place to put in driver specific hooks.
+ * Will release the bo::reserved lock and the
+ * lru lock on exit.
+ */
+
+static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_global *glob = bo->glob;
+
+	if (bo->ttm) {
+
+		/**
+		 * Release the lru_lock, since we don't want to have
+		 * an atomic requirement on ttm_tt[unbind|destroy].
+		 */
+
+		spin_unlock(&glob->lru_lock);
+		ttm_tt_unbind(bo->ttm);
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+		spin_lock(&glob->lru_lock);
+	}
+
+	if (bo->mem.mm_node) {
+		drm_mm_put_block(bo->mem.mm_node);
+		bo->mem.mm_node = NULL;
+	}
+
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+	spin_unlock(&glob->lru_lock);
+}
+
+
 /**
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, and already on delayed list, do nothing.
@@ -456,6 +493,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 	int ret;
 
 	spin_lock(&bo->lock);
+retry:
 	(void) ttm_bo_wait(bo, false, false, !remove_all);
 
 	if (!bo->sync_obj) {
@@ -464,31 +502,52 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		spin_unlock(&bo->lock);
 
 		spin_lock(&glob->lru_lock);
-		put_count = ttm_bo_del_from_lru(bo);
+		ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
+
+		/**
+		 * Someone else has the object reserved. Bail and retry.
+		 */
 
-		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
-		BUG_ON(ret);
-		if (bo->ttm)
-			ttm_tt_unbind(bo->ttm);
+		if (unlikely(ret == -EBUSY)) {
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			goto requeue;
+		}
+
+		/**
+		 * We can re-check for sync object without taking
+		 * the bo::lock since setting the sync object requires
+		 * also bo::reserved. A busy object at this point may
+		 * be caused by another thread starting an accelerated
+		 * eviction.
+		 */
+
+		if (unlikely(bo->sync_obj)) {
+			atomic_set(&bo->reserved, 0);
+			wake_up_all(&bo->event_queue);
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			if (remove_all)
+				goto retry;
+			else
+				goto requeue;
+		}
+
+		put_count = ttm_bo_del_from_lru(bo);
 
 		if (!list_empty(&bo->ddestroy)) {
 			list_del_init(&bo->ddestroy);
 			++put_count;
 		}
-		if (bo->mem.mm_node) {
-			drm_mm_put_block(bo->mem.mm_node);
-			bo->mem.mm_node = NULL;
-		}
-		spin_unlock(&glob->lru_lock);
 
-		atomic_set(&bo->reserved, 0);
+		ttm_bo_cleanup_memtype_use(bo);
 
 		while (put_count--)
 			kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
 		return 0;
 	}
-
+requeue:
 	spin_lock(&glob->lru_lock);
 	if (list_empty(&bo->ddestroy)) {
 		void *sync_obj = bo->sync_obj;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c74e2e..2040e6c4f172 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
 
 	atomic_t reserved;
 
-
 	/**
 	 * Members protected by the bo::lock
+	 * In addition, setting sync_obj to anything else
+	 * than NULL requires bo::reserved to be held. This allows for
+	 * checking NULL while reserved but not holding bo::lock.
 	 */
 
 	void *sync_obj_arg;
-- 
cgit v1.2.3-59-g8ed1b


From 430c62fb2948d964cf8dc7f3e2f69623c04ef62f Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Thu, 7 Oct 2010 09:35:16 +0200
Subject: elevator: fix oops on early call to elevator_change()

2.6.36 introduces an API for drivers to switch the IO scheduler
instead of manually calling the elevator exit and init functions.
This API was added since q->elevator must be cleared in between
those two calls. And since we already have this functionality
directly from use by the sysfs interface to switch schedulers
online, it was prudent to reuse it internally too.

But this API needs the queue to be in a fully initialized state
before it is called, or it will attempt to unregister elevator
kobjects before they have been added. This results in an oops
like this:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000051
IP: [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
PGD 47ddfc067 PUD 47c6a1067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/0000:04:00.1/irq
CPU 2
Modules linked in: t(+) loop hid_apple usbhid ahci ehci_hcd uhci_hcd libahci usbcore nls_base igb

Pid: 7319, comm: modprobe Not tainted 2.6.36-rc6+ #132 QSSC-S4R/QSSC-S4R
RIP: 0010:[<ffffffff8116f15e>]  [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
RSP: 0018:ffff88027da25d08  EFLAGS: 00010246
RAX: ffff88047c68c528 RBX: 00000000fffffffe RCX: 0000000000000000
RDX: 000000000000002f RSI: 000000000000002f RDI: ffff88047e196c88
RBP: ffff88027da25d38 R08: 0000000000000000 R09: d84156c5635688c0
R10: d84156c5635688c0 R11: 0000000000000000 R12: ffff88047e196c88
R13: 0000000000000000 R14: 0000000000000000 R15: ffff88047c68c528
FS:  00007fcb0b26f6e0(0000) GS:ffff880287400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000051 CR3: 000000047e76e000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 7319, threadinfo ffff88027da24000, task ffff88027d377090)
Stack:
 ffff88027da25d58 ffff88047c68c528 00000000fffffffe ffff88047e196c88
<0> ffff88047c68c528 ffff88047e05bd90 ffff88027da25d78 ffffffff8123fb77
<0> ffff88047e05bd90 0000000000000000 ffff88047e196c88 ffff88047c68c528
Call Trace:
 [<ffffffff8123fb77>] kobject_add_internal+0xe7/0x1f0
 [<ffffffff8123fd98>] kobject_add_varg+0x38/0x60
 [<ffffffff8123feb9>] kobject_add+0x69/0x90
 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0
 [<ffffffff8103d48d>] ? sub_preempt_count+0x9d/0xe0
 [<ffffffff8143de20>] ? _raw_spin_unlock+0x30/0x50
 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0
 [<ffffffff8116eff4>] ? sysfs_remove_dir+0x34/0xa0
 [<ffffffff81224204>] elv_register_queue+0x34/0xa0
 [<ffffffff81224aad>] elevator_change+0xfd/0x250
 [<ffffffffa007e000>] ? t_init+0x0/0x361 [t]
 [<ffffffffa007e000>] ? t_init+0x0/0x361 [t]
 [<ffffffffa007e0a8>] t_init+0xa8/0x361 [t]
 [<ffffffff810001de>] do_one_initcall+0x3e/0x170
 [<ffffffff8108c3fd>] sys_init_module+0xbd/0x220
 [<ffffffff81002f2b>] system_call_fastpath+0x16/0x1b
Code: e5 41 56 41 55 41 54 49 89 fc 53 48 83 ec 10 48 85 ff 74 52 48 8b 47 18 49 c7 c5 00 46 61 81 48 85 c0 74 04 4c 8b 68 30 45 31 f6 <41> 80 7d 51 00 74 0e 49 8b 44 24 28 4c 89 e7 ff 50 20 49 89 c6
RIP  [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
 RSP <ffff88027da25d08>
CR2: 0000000000000051
---[ end trace a6541d3bf07945df ]---

Fix this by adding a registered bit to the elevator queue, which is
set when the sysfs kobjects have been registered.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/elevator.c         | 12 ++++++++----
 include/linux/elevator.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index 205b09a5bd9e..4e11559aa2b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -938,6 +938,7 @@ int elv_register_queue(struct request_queue *q)
 			}
 		}
 		kobject_uevent(&e->kobj, KOBJ_ADD);
+		e->registered = 1;
 	}
 	return error;
 }
@@ -947,6 +948,7 @@ static void __elv_unregister_queue(struct elevator_queue *e)
 {
 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
 	kobject_del(&e->kobj);
+	e->registered = 0;
 }
 
 void elv_unregister_queue(struct request_queue *q)
@@ -1042,11 +1044,13 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	spin_unlock_irq(q->queue_lock);
 
-	__elv_unregister_queue(old_elevator);
+	if (old_elevator->registered) {
+		__elv_unregister_queue(old_elevator);
 
-	err = elv_register_queue(q);
-	if (err)
-		goto fail_register;
+		err = elv_register_queue(q);
+		if (err)
+			goto fail_register;
+	}
 
 	/*
 	 * finally exit old elevator and turn off BYPASS.
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 926b50322a46..4fd978e7eb83 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -93,6 +93,7 @@ struct elevator_queue
 	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
 	struct hlist_head *hash;
+	unsigned int registered:1;
 };
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 7c5347733dcc4ba0bac0baf86d99fae0561f33b7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 Oct 2010 18:13:31 -0400
Subject: fanotify: disable fanotify syscalls

This patch disables the fanotify syscalls by just not building them and
letting the cond_syscall() statements in kernel/sys_ni.c redirect them
to sys_ni_syscall().

It was pointed out by Tvrtko Ursulin that the fanotify interface did not
include an explicit prioritization between groups.  This is necessary
for fanotify to be usable for hierarchical storage management software,
as they must get first access to the file, before inotify-like notifiers
see the file.

This feature can be added in an ABI compatible way in the next release
(by using a number of bits in the flags field to carry the info) but it
was suggested by Alan that maybe we should just hold off and do it in
the next cycle, likely with an (new) explicit argument to the syscall.
I don't like this approach best as I know people are already starting to
use the current interface, but Alan is all wise and noone on list backed
me up with just using what we have.  I feel this is needlessly ripping
the rug out from under people at the last minute, but if others think it
needs to be a new argument it might be the best way forward.

Three choices:
Go with what we got (and implement the new feature next cycle).  Add a
new field right now (and implement the new feature next cycle).  Wait
till next cycle to release the ABI (and implement the new feature next
cycle).  This is number 3.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/Kconfig    | 2 +-
 include/linux/Kbuild | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 22c629eedd82..b388443c3a09 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
-source "fs/notify/fanotify/Kconfig"
+#source "fs/notify/fanotify/Kconfig"
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429ff..4e8ea8c8ec1e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,7 +118,6 @@ header-y += eventpoll.h
 header-y += ext2_fs.h
 header-y += fadvise.h
 header-y += falloc.h
-header-y += fanotify.h
 header-y += fb.h
 header-y += fcntl.h
 header-y += fd.h
-- 
cgit v1.2.3-59-g8ed1b


From 0eead9ab41da33644ae2c97c57ad03da636a0422 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Oct 2010 10:57:40 -0700
Subject: Don't dump task struct in a.out core-dumps

akiphie points out that a.out core-dumps have that odd task struct
dumping that was never used and was never really a good idea (it goes
back into the mists of history, probably the original core-dumping
code).  Just remove it.

Also do the access_ok() check on dump_write().  It probably doesn't
matter (since normal filesystems all seem to do it anyway), but he
points out that it's normally done by the VFS layer, so ...

[ I suspect that we should possibly do "vfs_write()" instead of
  calling ->write directly.  That also does the whole fsnotify and write
  statistics thing, which may or may not be a good idea. ]

And just to be anal, do this all for the x86-64 32-bit a.out emulation
code too, even though it's not enabled (and won't currently even
compile)

Reported-by: akiphie <akiphie@lavabit.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32_aout.c | 22 +++++-----------------
 fs/binfmt_aout.c          |  4 ----
 include/linux/coredump.h  |  2 +-
 3 files changed, 6 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 0350311906ae..2d93bdbc9ac0 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -34,7 +34,7 @@
 #include <asm/ia32.h>
 
 #undef WARN_OLD
-#undef CORE_DUMP /* probably broken */
+#undef CORE_DUMP /* definitely broken */
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
 static int load_aout_library(struct file *);
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end)
  * macros to write out all the necessary info.
  */
 
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
+#include <linux/coredump.h>
 
 #define DUMP_WRITE(addr, nr)			     \
 	if (!dump_write(file, (void *)(addr), (nr))) \
 		goto end_coredump;
 
-#define DUMP_SEEK(offset)						\
-	if (file->f_op->llseek) {					\
-		if (file->f_op->llseek(file, (offset), 0) != (offset))	\
-			goto end_coredump;				\
-	} else								\
-		file->f_pos = (offset)
+#define DUMP_SEEK(offset)		\
+	if (!dump_seek(file, offset))	\
+		goto end_coredump;
 
 #define START_DATA()	(u.u_tsize << PAGE_SHIFT)
 #define START_STACK(u)	(u.start_stack)
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
 		dump_size = dump.u_ssize << PAGE_SHIFT;
 		DUMP_WRITE(dump_start, dump_size);
 	}
-	/*
-	 * Finally dump the task struct.  Not be used by gdb, but
-	 * could be useful
-	 */
-	set_fs(KERNEL_DS);
-	DUMP_WRITE(current, sizeof(*current));
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f96eff04e11a..a6395bdb26ae 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm)
 		if (!dump_write(file, dump_start, dump_size))
 			goto end_coredump;
 	}
-/* Finally dump the task struct.  Not be used by gdb, but could be useful */
-	set_fs(KERNEL_DS);
-	if (!dump_write(file, current, sizeof(*current)))
-		goto end_coredump;
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 8ba66a9d9022..59579cfee6a0 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,7 +11,7 @@
  */
 static inline int dump_write(struct file *file, const void *addr, int nr)
 {
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
 }
 
 static inline int dump_seek(struct file *file, loff_t off)
-- 
cgit v1.2.3-59-g8ed1b


From 3aa0ce825ade0cf5506e32ccf51d01fc8d22a9cf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Oct 2010 14:32:06 -0700
Subject: Un-inline the core-dump helper functions

Tony Luck reports that the addition of the access_ok() check in commit
0eead9ab41da ("Don't dump task struct in a.out core-dumps") broke the
ia64 compile due to missing the necessary header file includes.

Rather than add yet another include (<asm/unistd.h>) to make everything
happy, just uninline the silly core dump helper functions and move the
bodies to fs/exec.c where they make a lot more sense.

dump_seek() in particular was too big to be an inline function anyway,
and none of them are in any way performance-critical.  And we really
don't need to mess up our include file headers more than they already
are.

Reported-and-tested-by: Tony Luck <tony.luck@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/coredump.h | 34 ++--------------------------------
 2 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 828dd2461d6b..03278c984ba0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2014,3 +2014,41 @@ fail_creds:
 fail:
 	return;
 }
+
+/*
+ * Core dumping helper functions.  These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+int dump_seek(struct file *file, loff_t off)
+{
+	int ret = 1;
+
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+			return 0;
+	} else {
+		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+		if (!buf)
+			return 0;
+		while (off > 0) {
+			unsigned long n = off;
+
+			if (n > PAGE_SIZE)
+				n = PAGE_SIZE;
+			if (!dump_write(file, buf, n)) {
+				ret = 0;
+				break;
+			}
+			off -= n;
+		}
+		free_page((unsigned long)buf);
+	}
+	return ret;
+}
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 59579cfee6a0..ba4b85a6d9b8 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -9,37 +9,7 @@
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-static inline int dump_write(struct file *file, const void *addr, int nr)
-{
-	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static inline int dump_seek(struct file *file, loff_t off)
-{
-	int ret = 1;
-
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n)) {
-				ret = 0;
-				break;
-			}
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return ret;
-}
+extern int dump_write(struct file *file, const void *addr, int nr);
+extern int dump_seek(struct file *file, loff_t off);
 
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3-59-g8ed1b


From 79b5dc0c64d88cda3da23b2e22a5cec0964372ac Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 15 Oct 2010 14:34:14 -0700
Subject: types.h: define __aligned_u64 and expose to userspace

We currently have a kernel internal type called aligned_u64 which aligns
__u64's on 8 bytes boundaries even on systems which would normally align
them on 4 byte boundaries.  This patch creates a new type __aligned_u64
which does the same thing but which is exposed to userspace rather than
being kernel internal.

[akpm: merge early as both the net and audit trees want this]

[akpm@linux-foundation.org: enhance the comment describing the reasons for using aligned_u64.  Via Andreas and Andi.]
Based-on-patch-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/types.h b/include/linux/types.h
index 01a082f56ef4..357dbc19606f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -121,7 +121,15 @@ typedef		__u64		u_int64_t;
 typedef		__s64		int64_t;
 #endif
 
-/* this is a special 64bit data type that is 8-byte aligned */
+/*
+ * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
+ * common 32/64-bit compat problems.
+ * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
+ * architectures) and to 8-byte boundaries on 64-bit architetures.  The new
+ * aligned_64 type enforces 8-byte alignment so that structs containing
+ * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
+ * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
+ */
 #define aligned_u64 __u64 __attribute__((aligned(8)))
 #define aligned_be64 __be64 __attribute__((aligned(8)))
 #define aligned_le64 __le64 __attribute__((aligned(8)))
@@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64;
 typedef __u16 __bitwise __sum16;
 typedef __u32 __bitwise __wsum;
 
+/* this is a special 64bit data type that is 8-byte aligned */
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
 #ifdef __KERNEL__
 typedef unsigned __bitwise__ gfp_t;
 typedef unsigned __bitwise__ fmode_t;
-- 
cgit v1.2.3-59-g8ed1b


From 363129ea90e0835b8552b797714cd200f674e287 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sat, 16 Oct 2010 13:24:52 -0400
Subject: ALSA: fix unused warnings with snd_power_get_state

If we compile the ASoC code with PM disabled, we hit stuff like:
sound/soc/soc-dapm.c: In function 'snd_soc_dapm_suspend_check':
sound/soc/soc-dapm.c:440: warning: unused variable 'codec'

So tweak the stub macro to avoid these issues.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 89e0ac17f44a..c129f0813bae 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -179,7 +179,7 @@ int snd_power_wait(struct snd_card *card, unsigned int power_state);
 #define snd_power_lock(card)		do { (void)(card); } while (0)
 #define snd_power_unlock(card)		do { (void)(card); } while (0)
 static inline int snd_power_wait(struct snd_card *card, unsigned int state) { return 0; }
-#define snd_power_get_state(card)	SNDRV_CTL_POWER_D0
+#define snd_power_get_state(card)	({ (void)(card); SNDRV_CTL_POWER_D0; })
 #define snd_power_change_state(card, state)	do { (void)(card); } while (0)
 
 #endif /* CONFIG_PM */
-- 
cgit v1.2.3-59-g8ed1b


From c08d91695b2a3349254a62b60f03f7971bd90fa0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Oct 2010 10:40:53 +0200
Subject: ALSA: tlv - Define numbers in sound/tlv.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h       | 4 +++-
 sound/pci/hda/hda_codec.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index 9fd5b19ccf5c..7067e2dfb0b9 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -38,9 +38,11 @@
 #define SNDRV_CTL_TLVT_DB_MINMAX 4	/* dB scale with min/max */
 #define SNDRV_CTL_TLVT_DB_MINMAX_MUTE 5	/* dB scale with min/max with mute */
 
+#define TLV_DB_SCALE_MASK	0xffff
+#define TLV_DB_SCALE_MUTE	0x10000
 #define TLV_DB_SCALE_ITEM(min, step, mute)			\
 	SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int),	\
-	(min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0)
+	(min), ((step) & TLV_DB_SCALE_MASK) | ((mute) ? TLV_DB_SCALE_MUTE : 0)
 #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
 	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
 
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 8c933c8006f4..ee134a25092c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1843,7 +1843,7 @@ int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	val1 += ofs;
 	val1 = ((int)val1) * ((int)val2);
 	if (min_mute)
-		val2 |= 0x10000;
+		val2 |= TLV_DB_SCALE_MUTE;
 	if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv))
 		return -EFAULT;
 	if (put_user(2 * sizeof(unsigned int), _tlv + 1))
-- 
cgit v1.2.3-59-g8ed1b


From 83fc3bc09518d42e8f5073e2a65884701dfadf19 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Mon, 18 Oct 2010 12:20:39 +0900
Subject: ALSA: emu10k1: Fix warning: "CCR" redefined

CCR is defined in emu10k1, but SuperH is defined too.
If user use this driver with SuperH, it becomes a double definition.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/emu10k1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 7dc97d12253c..4f865df42f0f 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -438,6 +438,8 @@
 #define CCCA_CURRADDR_MASK	0x00ffffff	/* Current address of the selected channel		*/
 #define CCCA_CURRADDR		0x18000008
 
+/* undefine CCR to avoid conflict with the definition for SH */
+#undef CCR
 #define CCR			0x09		/* Cache control register				*/
 #define CCR_CACHEINVALIDSIZE	0x07190009
 #define CCR_CACHEINVALIDSIZE_MASK	0xfe000000	/* Number of invalid samples cache for this channel    	*/
-- 
cgit v1.2.3-59-g8ed1b