From 18cb2aef91b37dbce2bec2f39bb1dddd0e9dd838 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 03:26:23 +0900 Subject: percpu: handle __percpu notations in UP accessors UP accessors didn't take care of __percpu notations leading to a lot of spurious sparse warnings on UP configurations. Fix it. Signed-off-by: Namhyung Kim Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 15 ++++++++++----- include/linux/percpu.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5043a9890d8..08923b684768 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &(var))) -#define __get_cpu_var(var) (var) -#define __raw_get_cpu_var(var) (var) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define VERIFY_PERCPU_PTR(__p) ({ \ + __verify_pcpu_ptr((__p)); \ + (typeof(*(__p)) __kernel __force *)(__p); \ +}) + +#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) +#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) #endif /* SMP */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b8b9084527b1..49466b13c5c6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -149,7 +149,7 @@ extern void __init percpu_init_late(void); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) /* can't distinguish from other static vars, always false */ static inline bool is_kernel_percpu_address(unsigned long addr) -- cgit v1.2.3-59-g8ed1b From 1b5ad24878b7e5a543b98c5d2f8c0d8c0dd3088f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 14:29:22 +0200 Subject: slub: add missing __percpu markup in mm/slub_def.h kmem_cache->cpu_slab is a percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 6447a723ecb1..5ec4bc0e45aa 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -68,7 +68,7 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { - struct kmem_cache_cpu *cpu_slab; + struct kmem_cache_cpu __percpu *cpu_slab; /* Used for retriving partial slabs etc */ unsigned long flags; int size; /* The size of an object including meta data */ -- cgit v1.2.3-59-g8ed1b From 92298e668372f2f6c8a79fb272f13d65161a4876 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 10:22:17 +1000 Subject: PCI: provide stub pci_domain_nr function for !CONFIG_PCI configs Allows the new PCI domain aware DRM code to compile on m68k. Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie Signed-off-by: Jesse Barnes --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index b1d17956a153..c8d95e369ff4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline int pci_domain_nr(struct pci_bus *bus) +{ return 0; } + #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) #define dev_num_vf(d) (0) -- cgit v1.2.3-59-g8ed1b From 1b2f1489633888d4a06028315dc19d65768a1c05 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 14 Aug 2010 20:20:34 +1000 Subject: drm: block userspace under allocating buffer and having drivers overwrite it (v2) With the current screwed but its ABI, ioctls for the drm, Linus pointed out that we could allow userspace to specify the allocation size, but we pass it to the driver which then uses it blindly to store a struct. Now if userspace specifies the allocation size as smaller than the driver needs, the driver can possibly overwrite memory. This patch restructures the driver ioctls so we store the structure size we are expecting, and make sure we allocate at least that size. The copy from/to userspace are still restricted to the size the user specifies, this allows ioctl structs to grow on both sides of the equation. Up until now we didn't really use the DRM_IOCTL defines in the kernel, so this cleans them up and adds them for nouveau. v2: fix nouveau pushbuf arg (thanks to Ben for pointing it out) Reported-by: Linus Torvalds Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drv.c | 23 ++++++--- drivers/gpu/drm/i810/i810_dma.c | 30 ++++++------ drivers/gpu/drm/i830/i830_dma.c | 28 +++++------ drivers/gpu/drm/i915/i915_dma.c | 80 +++++++++++++++---------------- drivers/gpu/drm/mga/mga_state.c | 26 +++++----- drivers/gpu/drm/nouveau/nouveau_channel.c | 24 +++++----- drivers/gpu/drm/r128/r128_state.c | 35 +++++++------- drivers/gpu/drm/radeon/radeon_kms.c | 78 +++++++++++++++--------------- drivers/gpu/drm/radeon/radeon_state.c | 56 +++++++++++----------- drivers/gpu/drm/savage/savage_bci.c | 8 ++-- drivers/gpu/drm/sis/sis_mm.c | 12 ++--- drivers/gpu/drm/via/via_dma.c | 28 +++++------ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 34 ++++++------- include/drm/drmP.h | 6 ++- include/drm/i830_drm.h | 28 +++++------ include/drm/i915_drm.h | 1 + include/drm/mga_drm.h | 2 +- include/drm/nouveau_drm.h | 13 +++++ include/drm/radeon_drm.h | 4 +- include/drm/savage_drm.h | 8 ++-- 20 files changed, 275 insertions(+), 249 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 3644c94c0a17..84da748555bc 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -55,6 +55,9 @@ static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); +#define DRM_IOCTL_DEF(ioctl, _func, _flags) \ + [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0} + /** Ioctl table */ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, 0), @@ -421,6 +424,7 @@ long drm_ioctl(struct file *filp, int retcode = -EINVAL; char stack_kdata[128]; char *kdata = NULL; + unsigned int usize, asize; dev = file_priv->minor->dev; atomic_inc(&dev->ioctl_count); @@ -436,11 +440,18 @@ long drm_ioctl(struct file *filp, ((nr < DRM_COMMAND_BASE) || (nr >= DRM_COMMAND_END))) goto err_i1; if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END) && - (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) + (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) { + u32 drv_size; ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE]; + drv_size = _IOC_SIZE(ioctl->cmd_drv); + usize = asize = _IOC_SIZE(cmd); + if (drv_size > asize) + asize = drv_size; + } else if ((nr >= DRM_COMMAND_END) || (nr < DRM_COMMAND_BASE)) { ioctl = &drm_ioctls[nr]; cmd = ioctl->cmd; + usize = asize = _IOC_SIZE(cmd); } else goto err_i1; @@ -460,10 +471,10 @@ long drm_ioctl(struct file *filp, retcode = -EACCES; } else { if (cmd & (IOC_IN | IOC_OUT)) { - if (_IOC_SIZE(cmd) <= sizeof(stack_kdata)) { + if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; } else { - kdata = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL); + kdata = kmalloc(asize, GFP_KERNEL); if (!kdata) { retcode = -ENOMEM; goto err_i1; @@ -473,12 +484,12 @@ long drm_ioctl(struct file *filp, if (cmd & IOC_IN) { if (copy_from_user(kdata, (void __user *)arg, - _IOC_SIZE(cmd)) != 0) { + usize) != 0) { retcode = -EFAULT; goto err_i1; } } else - memset(kdata, 0, _IOC_SIZE(cmd)); + memset(kdata, 0, usize); if (ioctl->flags & DRM_UNLOCKED) retcode = func(dev, kdata, file_priv); @@ -490,7 +501,7 @@ long drm_ioctl(struct file *filp, if (cmd & IOC_OUT) { if (copy_to_user((void __user *)arg, kdata, - _IOC_SIZE(cmd)) != 0) + usize) != 0) retcode = -EFAULT; } } diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 0e6c131313d9..61b4caf220fa 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -1255,21 +1255,21 @@ long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } struct drm_ioctl_desc i810_ioctls[] = { - DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED), }; int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls); diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c index 5168862c9227..671aa18415ac 100644 --- a/drivers/gpu/drm/i830/i830_dma.c +++ b/drivers/gpu/drm/i830/i830_dma.c @@ -1524,20 +1524,20 @@ long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } struct drm_ioctl_desc i830_ioctls[] = { - DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED), }; int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f19ffe87af3c..a2d3509c393b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2360,46 +2360,46 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc i915_ioctls[] = { - DRM_IOCTL_DEF(DRM_I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_FLUSH, i915_flush_ioctl, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_FLIP, i915_flip_bufs, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_GETPARAM, i915_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_ALLOC, i915_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_FREE, i915_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_DESTROY_HEAP, i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ), - DRM_IOCTL_DEF(DRM_I915_SET_VBLANK_PIPE, i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ), - DRM_IOCTL_DEF(DRM_I915_GET_VBLANK_PIPE, i915_vblank_pipe_get, DRM_AUTH ), - DRM_IOCTL_DEF(DRM_I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_FLUSH, i915_flush_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_FLIP, i915_flip_bufs, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_ALLOC, i915_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_FREE, i915_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, i915_vblank_pipe_get, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c index fff82045c427..9ce2827f8c00 100644 --- a/drivers/gpu/drm/mga/mga_state.c +++ b/drivers/gpu/drm/mga/mga_state.c @@ -1085,19 +1085,19 @@ file_priv) } struct drm_ioctl_desc mga_ioctls[] = { - DRM_IOCTL_DEF(DRM_MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_MGA_FLUSH, mga_dma_flush, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_RESET, mga_dma_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_SWAP, mga_dma_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_CLEAR, mga_dma_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_VERTEX, mga_dma_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_INDICES, mga_dma_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_ILOAD, mga_dma_iload, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_BLIT, mga_dma_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_GETPARAM, mga_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_SET_FENCE, mga_set_fence, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(MGA_FLUSH, mga_dma_flush, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_RESET, mga_dma_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_SWAP, mga_dma_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_CLEAR, mga_dma_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_VERTEX, mga_dma_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_INDICES, mga_dma_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_ILOAD, mga_dma_iload, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_BLIT, mga_dma_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_GETPARAM, mga_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_SET_FENCE, mga_set_fence, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), }; int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls); diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c index 90fdcda332be..0480f064f2c1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c @@ -426,18 +426,18 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data, ***********************************/ struct drm_ioctl_desc nouveau_ioctls[] = { - DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH), }; int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls); diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index 077af1f2f9b4..a9e33ce65918 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -1639,30 +1639,29 @@ void r128_driver_preclose(struct drm_device *dev, struct drm_file *file_priv) r128_do_cleanup_pageflip(dev); } } - void r128_driver_lastclose(struct drm_device *dev) { r128_do_cleanup_cce(dev); } struct drm_ioctl_desc r128_ioctls[] = { - DRM_IOCTL_DEF(DRM_R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_IDLE, r128_cce_idle, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_RESET, r128_engine_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_FULLSCREEN, r128_fullscreen, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_SWAP, r128_cce_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_FLIP, r128_cce_flip, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_CLEAR, r128_cce_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_VERTEX, r128_cce_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_INDICES, r128_cce_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_BLIT, r128_cce_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_DEPTH, r128_cce_depth, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_STIPPLE, r128_cce_stipple, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_GETPARAM, r128_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_IDLE, r128_cce_idle, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_RESET, r128_engine_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_FULLSCREEN, r128_fullscreen, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_SWAP, r128_cce_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_FLIP, r128_cce_flip, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_CLEAR, r128_cce_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_VERTEX, r128_cce_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INDICES, r128_cce_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_BLIT, r128_cce_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_DEPTH, r128_cce_depth, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_STIPPLE, r128_cce_stipple, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH), }; int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index b1c8ace5f080..27435db0aa48 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -323,45 +323,45 @@ KMS_INVALID_IOCTL(radeon_surface_free_kms) struct drm_ioctl_desc radeon_ioctls_kms[] = { - DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH), /* KMS */ - DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index b3ba44c0a818..4ae5a3d1074e 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -3228,34 +3228,34 @@ void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc radeon_ioctls[] = { - DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) + DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c index f576232846c3..6756c97899f1 100644 --- a/drivers/gpu/drm/savage/savage_bci.c +++ b/drivers/gpu/drm/savage/savage_bci.c @@ -1082,10 +1082,10 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc savage_ioctls[] = { - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH), }; int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls); diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c index 07d0f2979cac..7fe2b63412ce 100644 --- a/drivers/gpu/drm/sis/sis_mm.c +++ b/drivers/gpu/drm/sis/sis_mm.c @@ -320,12 +320,12 @@ void sis_reclaim_buffers_locked(struct drm_device *dev, } struct drm_ioctl_desc sis_ioctls[] = { - DRM_IOCTL_DEF(DRM_SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_FB_FREE, sis_drm_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_AGP_FREE, sis_drm_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_FB_FREE, sis_drm_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_AGP_FREE, sis_drm_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), }; int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls); diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c index 68dda74a50ae..cc0ffa9abd00 100644 --- a/drivers/gpu/drm/via/via_dma.c +++ b/drivers/gpu/drm/via/via_dma.c @@ -722,20 +722,20 @@ static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file * } struct drm_ioctl_desc via_ioctls[] = { - DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH) + DRM_IOCTL_DEF_DRV(VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_FREEMEM, via_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_DMA_INIT, via_dma_init, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_FLUSH, via_flush_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_DMA_BLIT, via_dma_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH) }; int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9dd395b90216..72ec2e2b6e97 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -99,47 +99,47 @@ */ #define VMW_IOCTL_DEF(ioctl, func, flags) \ - [DRM_IOCTL_NR(ioctl) - DRM_COMMAND_BASE] = {ioctl, flags, func} + [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_##ioctl, flags, func, DRM_IOCTL_##ioctl} /** * Ioctl definitions. */ static struct drm_ioctl_desc vmw_ioctls[] = { - VMW_IOCTL_DEF(DRM_IOCTL_VMW_GET_PARAM, vmw_getparam_ioctl, + VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl, + VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CURSOR_BYPASS, + VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, vmw_kms_cursor_bypass_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CONTROL_STREAM, vmw_overlay_ioctl, + VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, + VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_STREAM, vmw_stream_unref_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_CONTEXT, vmw_context_define_ioctl, + VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_SURFACE, vmw_surface_define_ioctl, + VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_REF_SURFACE, vmw_surface_reference_ioctl, + VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_EXECBUF, vmw_execbuf_ioctl, + VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl, + VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl, DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_FENCE_WAIT, vmw_fence_wait_ioctl, + VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl, + VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED) }; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 2a512bc0d4ab..7809d230adee 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -305,14 +305,16 @@ struct drm_ioctl_desc { unsigned int cmd; int flags; drm_ioctl_t *func; + unsigned int cmd_drv; }; /** * Creates a driver or general drm_ioctl_desc array entry for the given * ioctl, for use by drm_ioctl(). */ -#define DRM_IOCTL_DEF(ioctl, _func, _flags) \ - [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags} + +#define DRM_IOCTL_DEF_DRV(ioctl, _func, _flags) \ + [DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl} struct drm_magic_entry { struct list_head head; diff --git a/include/drm/i830_drm.h b/include/drm/i830_drm.h index 4b00d2dd4f68..61315c29b8f3 100644 --- a/include/drm/i830_drm.h +++ b/include/drm/i830_drm.h @@ -264,20 +264,20 @@ typedef struct _drm_i830_sarea { #define DRM_I830_GETPARAM 0x0c #define DRM_I830_SETPARAM 0x0d -#define DRM_IOCTL_I830_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_INIT, drm_i830_init_t) -#define DRM_IOCTL_I830_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_VERTEX, drm_i830_vertex_t) -#define DRM_IOCTL_I830_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_CLEAR, drm_i830_clear_t) -#define DRM_IOCTL_I830_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLUSH) -#define DRM_IOCTL_I830_GETAGE DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_GETAGE) -#define DRM_IOCTL_I830_GETBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETBUF, drm_i830_dma_t) -#define DRM_IOCTL_I830_SWAP DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_SWAP) -#define DRM_IOCTL_I830_COPY DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_COPY, drm_i830_copy_t) -#define DRM_IOCTL_I830_DOCOPY DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_DOCOPY) -#define DRM_IOCTL_I830_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLIP) -#define DRM_IOCTL_I830_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_EMIT, drm_i830_irq_emit_t) -#define DRM_IOCTL_I830_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_WAIT, drm_i830_irq_wait_t) -#define DRM_IOCTL_I830_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETPARAM, drm_i830_getparam_t) -#define DRM_IOCTL_I830_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_SETPARAM, drm_i830_setparam_t) +#define DRM_IOCTL_I830_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I830_INIT, drm_i830_init_t) +#define DRM_IOCTL_I830_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_I830_VERTEX, drm_i830_vertex_t) +#define DRM_IOCTL_I830_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_I830_CLEAR, drm_i830_clear_t) +#define DRM_IOCTL_I830_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLUSH) +#define DRM_IOCTL_I830_GETAGE DRM_IO ( DRM_COMMAND_BASE + DRM_I830_GETAGE) +#define DRM_IOCTL_I830_GETBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETBUF, drm_i830_dma_t) +#define DRM_IOCTL_I830_SWAP DRM_IO ( DRM_COMMAND_BASE + DRM_I830_SWAP) +#define DRM_IOCTL_I830_COPY DRM_IOW( DRM_COMMAND_BASE + DRM_I830_COPY, drm_i830_copy_t) +#define DRM_IOCTL_I830_DOCOPY DRM_IO ( DRM_COMMAND_BASE + DRM_I830_DOCOPY) +#define DRM_IOCTL_I830_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLIP) +#define DRM_IOCTL_I830_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_IRQ_EMIT, drm_i830_irq_emit_t) +#define DRM_IOCTL_I830_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I830_IRQ_WAIT, drm_i830_irq_wait_t) +#define DRM_IOCTL_I830_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETPARAM, drm_i830_getparam_t) +#define DRM_IOCTL_I830_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_SETPARAM, drm_i830_setparam_t) typedef struct _drm_i830_clear { int clear_color; diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 7f0028e1010b..000357bf3ac8 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) #define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) #define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) +#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h index 3ffbc4798afa..c16097f99be0 100644 --- a/include/drm/mga_drm.h +++ b/include/drm/mga_drm.h @@ -248,7 +248,7 @@ typedef struct _drm_mga_sarea { #define DRM_MGA_DMA_BOOTSTRAP 0x0c #define DRM_IOCTL_MGA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t) -#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t) +#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock) #define DRM_IOCTL_MGA_RESET DRM_IO( DRM_COMMAND_BASE + DRM_MGA_RESET) #define DRM_IOCTL_MGA_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_MGA_SWAP) #define DRM_IOCTL_MGA_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t) diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h index fe917dee723a..01a714119506 100644 --- a/include/drm/nouveau_drm.h +++ b/include/drm/nouveau_drm.h @@ -197,4 +197,17 @@ struct drm_nouveau_sarea { #define DRM_NOUVEAU_GEM_CPU_FINI 0x43 #define DRM_NOUVEAU_GEM_INFO 0x44 +#define DRM_IOCTL_NOUVEAU_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam) +#define DRM_IOCTL_NOUVEAU_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam) +#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc) +#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free) +#define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc) +#define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc) +#define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free) +#define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) +#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini) +#define DRM_IOCTL_NOUVEAU_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info) + #endif /* __NOUVEAU_DRM_H__ */ diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 0acaf8f91437..10f8b53bdd40 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -547,8 +547,8 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) #define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) #define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) -#define DRM_IOCTL_RADEON_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) -#define DRM_IOCTL_RADEON_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) +#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) typedef struct drm_radeon_init { diff --git a/include/drm/savage_drm.h b/include/drm/savage_drm.h index 8a576ef01821..4863cf6bf96f 100644 --- a/include/drm/savage_drm.h +++ b/include/drm/savage_drm.h @@ -63,10 +63,10 @@ typedef struct _drm_savage_sarea { #define DRM_SAVAGE_BCI_EVENT_EMIT 0x02 #define DRM_SAVAGE_BCI_EVENT_WAIT 0x03 -#define DRM_IOCTL_SAVAGE_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) -#define DRM_IOCTL_SAVAGE_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) -#define DRM_IOCTL_SAVAGE_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) -#define DRM_IOCTL_SAVAGE_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) +#define DRM_IOCTL_SAVAGE_BCI_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) +#define DRM_IOCTL_SAVAGE_BCI_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) +#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) +#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) #define SAVAGE_DMA_PCI 1 #define SAVAGE_DMA_AGP 3 -- cgit v1.2.3-59-g8ed1b From 99c796df94afca5256860dd4760017f1dbb3480c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Aug 2010 22:13:22 +0100 Subject: VIDEO: amba clcd: don't disable an already disabled clock Fix the clock enable/disable tracking in the AMBA CLCD driver so that the driver doesn't try to disable an already disabled clock, thereby causing the clock (if shared) to become unbalanced. This resolves a problem with CLCD on LPC32xx ARM platforms. Reported-by: Kevin Wells Signed-off-by: Russell King --- drivers/video/amba-clcd.c | 10 ++++++++-- include/linux/amba/clcd.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index afe21e6eb544..1c2c68356ea7 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb) /* * Disable CLCD clock source. */ - clk_disable(fb->clk); + if (fb->clk_enabled) { + fb->clk_enabled = false; + clk_disable(fb->clk); + } } static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) @@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) /* * Enable the CLCD clock source. */ - clk_enable(fb->clk); + if (!fb->clk_enabled) { + fb->clk_enabled = true; + clk_enable(fb->clk); + } /* * Bring up by first enabling.. diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index ca16c3801a1e..be33b3affc8a 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -150,6 +150,7 @@ struct clcd_fb { u16 off_cntl; u32 clcd_cntl; u32 cmap[16]; + bool clk_enabled; }; static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) -- cgit v1.2.3-59-g8ed1b From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Aug 2010 23:52:56 +0100 Subject: Make do_execve() take a const filename pointer Make do_execve() take a const filename pointer so that kernel_execve() compiles correctly on ARM: arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type This also requires the argv and envp arguments to be consted twice, once for the pointer array and once for the strings the array points to. This is because do_execve() passes a pointer to the filename (now const) to copy_strings_kernel(). A simpler alternative would be to cast the filename pointer in do_execve() when it's passed to copy_strings_kernel(). do_execve() may not change any of the strings it is passed as part of the argv or envp lists as they are some of them in .rodata, so marking these strings as const should be fine. Further kernel_execve() and sys_execve() need to be changed to match. This has been test built on x86_64, frv, arm and mips. Signed-off-by: David Howells Tested-by: Ralf Baechle Acked-by: Russell King Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 5 +++-- arch/arm/kernel/sys_arm.c | 14 +++++++++----- arch/avr32/kernel/process.c | 5 +++-- arch/avr32/kernel/sys_avr32.c | 4 +++- arch/blackfin/kernel/process.c | 4 +++- arch/cris/arch-v10/kernel/process.c | 4 +++- arch/cris/arch-v32/kernel/process.c | 6 ++++-- arch/frv/kernel/process.c | 5 +++-- arch/h8300/kernel/process.c | 5 ++++- arch/h8300/kernel/sys_h8300.c | 4 +++- arch/ia64/kernel/process.c | 4 +++- arch/m32r/kernel/process.c | 4 ++-- arch/m32r/kernel/sys_m32r.c | 4 +++- arch/m68k/kernel/process.c | 4 +++- arch/m68k/kernel/sys_m68k.c | 4 +++- arch/m68knommu/kernel/process.c | 4 +++- arch/m68knommu/kernel/sys_m68k.c | 4 +++- arch/microblaze/kernel/sys_microblaze.c | 10 +++++++--- arch/mips/kernel/syscall.c | 10 +++++++--- arch/mn10300/kernel/process.c | 4 ++-- arch/parisc/hpux/fs.c | 6 ++++-- arch/parisc/kernel/process.c | 15 ++++++++++----- arch/powerpc/kernel/process.c | 5 +++-- arch/s390/kernel/process.c | 5 +++-- arch/score/kernel/sys_score.c | 10 +++++++--- arch/sh/kernel/process_32.c | 7 ++++--- arch/sh/kernel/process_64.c | 4 ++-- arch/sh/kernel/sys_sh32.c | 4 +++- arch/sh/kernel/sys_sh64.c | 4 +++- arch/sparc/kernel/process_32.c | 6 ++++-- arch/sparc/kernel/process_64.c | 4 ++-- arch/sparc/kernel/sys_sparc_32.c | 4 +++- arch/sparc/kernel/sys_sparc_64.c | 4 +++- arch/tile/kernel/process.c | 5 +++-- arch/um/kernel/exec.c | 5 +++-- arch/um/kernel/syscall.c | 4 +++- arch/x86/include/asm/syscalls.h | 5 +++-- arch/x86/kernel/process.c | 5 +++-- arch/x86/kernel/sys_i386_32.c | 4 +++- arch/xtensa/kernel/process.c | 5 +++-- fs/binfmt_misc.c | 2 +- fs/binfmt_script.c | 3 ++- fs/exec.c | 21 +++++++++++---------- include/linux/binfmts.h | 7 ++++--- include/linux/sched.h | 4 +++- include/linux/syscalls.h | 2 +- init/do_mounts_initrd.c | 7 ++++--- init/main.c | 6 +++--- kernel/kmod.c | 4 +++- security/commoncap.c | 2 +- 50 files changed, 179 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 88e608aebc8c..842dba308eab 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp); * sys_execve() executes a new program. */ asmlinkage int -do_sys_execve(const char __user *ufilename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +do_sys_execve(const char __user *ufilename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char *filename; diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 5b7c541a4c63..62e7c61d0342 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) /* sys_execve() executes a new program. * This is called indirectly via a small wrapper */ -asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +asmlinkage int sys_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char * filename; @@ -78,14 +79,17 @@ out: return error; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { struct pt_regs regs; int ret; memset(®s, 0, sizeof(struct pt_regs)); - ret = do_execve(filename, (char __user * __user *)argv, - (char __user * __user *)envp, ®s); + ret = do_execve(filename, + (const char __user *const __user *)argv, + (const char __user *const __user *)envp, ®s); if (ret < 0) goto out; diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index e5daddff397d..9c46aaad11ce 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) } asmlinkage int sys_execve(const char __user *ufilename, - char __user *__user *uargv, - char __user *__user *uenvp, struct pt_regs *regs) + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 459349b5ed5a..62635a09ae3e 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -7,7 +7,9 @@ */ #include -int kernel_execve(const char *file, char **argv, char **envp) +int kernel_execve(const char *file, + const char *const *argv, + const char *const *envp) { register long scno asm("r8") = __NR_execve; register long sc1 asm("r12") = (long)file; diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index a566f61c002a..01f98cb964d2 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char *filename; diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 93f0f64b1326..9a57db6907f5 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *fname, char **argv, char **envp, +asmlinkage int sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 2661a9529d70..562f84718906 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp, /* sys_execve() executes a new program. */ asmlinkage int -sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp, - struct pt_regs *regs) +sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 428931cf2f0c..2b63b0191f52 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 8b7b78d77d5c..97478138e361 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp, + int dummy, ...) { int error; char * filename; diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index f9b3f44da69f..dc1ac0243b78 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long res __asm__("er0"); register char *const *_c __asm__("er3") = envp; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a879c03b7f1c..16f1c7b04c69 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) } long -sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp, +sys_execve (const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { char *fname; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 8665a4d868ec..422bea9f1dbc 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2, * sys_execve() executes a new program. */ asmlinkage int sys_execve(const char __user *ufilename, - char __user * __user *uargv, - char __user * __user *uenvp, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs regs) { diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 0a00f467edfa..d841fb6cc703 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __scno __asm__ ("r7") = __NR_execve; register long __arg3 __asm__ ("r2") = (long)(envp); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 221d0b71ce39..18732ab23292 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 77896692eb0a..2f431ece7b5f 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6350f68cd026..4d090d3c0897 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -350,7 +350,9 @@ void dump(struct pt_regs *fp) /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp) { int error; char * filename; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index d65e9c4c930c..68488ae47f0a 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 6abab6ebedbe..2250fe9d269b 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs return do_fork(flags, stack, regs, 0, NULL, NULL); } -asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +asmlinkage long microblaze_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs) { int error; char *filename; @@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register const char *__a __asm__("r5") = filename; register const void *__b __asm__("r6") = argv; diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index bddce0bca195..1dc6edff45e0 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user *__user *) (long)regs.regs[5], - (char __user *__user *) (long)regs.regs[6], ®s); + error = do_execve(filename, + (const char __user *const __user *) (long)regs.regs[5], + (const char __user *const __user *) (long)regs.regs[6], + ®s); putname(filename); out: @@ -436,7 +438,9 @@ asmlinkage void bad_stack(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __a0 asm("$4") = (unsigned long) filename; register unsigned long __a1 asm("$5") = (unsigned long) argv; diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 762eb325b949..f48373e2bc1c 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void) } asmlinkage long sys_execve(const char __user *name, - char __user * __user *argv, - char __user * __user *envp) + const char __user *const __user *argv, + const char __user *const __user *envp) { char *filename; int error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 1444875a7611..0dc8543acb4f 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 76332dadc6e9..4b4b9181a1a0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); out: return error; } -extern int __execve(const char *filename, char *const argv[], - char *const envp[], struct task_struct *task); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +extern int __execve(const char *filename, + const char *const argv[], + const char *const envp[], struct task_struct *task); +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { return __execve(filename, argv, envp, current); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index feacfb789686..91356ffda2ca 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_spe_to_thread(current); - error = do_execve(filename, (char __user * __user *) a1, - (char __user * __user *) a2, regs); + error = do_execve(filename, + (const char __user *const __user *) a1, + (const char __user *const __user *) a2, regs); putname(filename); out: return error; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7eafaf2662b9..d3a2d1c6438e 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -267,8 +267,9 @@ asmlinkage void execve_tail(void) /* * sys_execve() executes a new program. */ -SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv, - char __user * __user *, envp) +SYSCALL_DEFINE3(execve, const char __user *, name, + const char __user *const __user *, argv, + const char __user *const __user *, envp) { struct pt_regs *regs = task_pt_regs(current); char *filename; diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c index 651096ff8db4..e478bf9a7e91 100644 --- a/arch/score/kernel/sys_score.c +++ b/arch/score/kernel/sys_score.c @@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs) if (IS_ERR(filename)) return error; - error = do_execve(filename, (char __user *__user*)regs->regs[5], - (char __user *__user *) regs->regs[6], regs); + error = do_execve(filename, + (const char __user *const __user *)regs->regs[5], + (const char __user *const __user *)regs->regs[6], + regs); putname(filename); return error; @@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __r4 asm("r4") = (unsigned long) filename; register unsigned long __r5 asm("r5") = (unsigned long) argv; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 052981972ae6..762a13984bbd 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv, - char __user * __user *uenvp, unsigned long r7, - struct pt_regs __regs) +asmlinkage int sys_execve(const char __user *ufilename, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + unsigned long r7, struct pt_regs __regs) { struct pt_regs *regs = RELOC_HIDE(&__regs, 0); int error; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 68d128d651b3..210c1cabcb7f 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv, goto out; error = do_execve(filename, - (char __user * __user *)uargv, - (char __user * __user *)uenvp, + (const char __user *const __user *)uargv, + (const char __user *const __user *)uenvp, pregs); putname(filename); out: diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index eb68bfdd86e6..f56b6fe5c5d0 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __sc0 __asm__ ("r3") = __NR_execve; register long __sc4 __asm__ ("r4") = (long) filename; diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 287235768bc5..c5a38c4bf410 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -33,7 +33,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve); register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 40e29fc8a4d6..17529298c50a 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if(IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *)regs->u_regs[base + UREG_I1], - (char __user * __user *)regs->u_regs[base + UREG_I2], + (const char __user *const __user *) + regs->u_regs[base + UREG_I1], + (const char __user *const __user *) + regs->u_regs[base + UREG_I2], regs); putname(filename); out: diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a368b45..485f54748384 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I1], - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I2], regs); putname(filename); if (!error) { diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ee995b7dae7e..50794137d710 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -282,7 +282,9 @@ out: * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3d435c42e6db..f836f4e93afe 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ed590ad0acdc..985cc28c74c5 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -long _sys_execve(char __user *path, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +long _sys_execve(const char __user *path, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 59b20d93b6d4..cd145eda3579 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_SP(regs) = esp; } -static long execve1(const char *file, char __user * __user *argv, - char __user *__user *env) +static long execve1(const char *file, + const char __user *const __user *argv, + const char __user *const __user *env) { long error; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 7427c0b1930c..5ddb246626db 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len, return err; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { mm_segment_t fs; int ret; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index feb2ff9bfc2d..f1d8b441fc77 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); -long sys_execve(const char __user *, char __user * __user *, - char __user * __user *, struct pt_regs *); +long sys_execve(const char __user *, + const char __user *const __user *, + const char __user *const __user *, struct pt_regs *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64ecaf0af9af..57d1868a86aa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. */ -long sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +long sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 196552bb412c..d5e06624e34a 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -28,7 +28,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 7c2f38f68ebb..e3558b9a58ba 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp, */ asmlinkage -long xtensa_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, +long xtensa_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, long a3, long a4, long a5, struct pt_regs *regs) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd201716..a7528b913936 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..396a9884591f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..05c7d6b84df7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c809e286d213..a065612fc928 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -50,8 +50,8 @@ struct linux_binprm{ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - char * filename; /* Name of binary as seen by procps */ - char * interp; /* Name of the binary really executed. Most + const char * filename; /* Name of binary as seen by procps */ + const char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; @@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); -extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); diff --git a/include/linux/sched.h b/include/linux/sched.h index ce160d68f5e7..1e2a6db2d7dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2109,7 +2109,9 @@ extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); +extern int do_execve(const char *, + const char __user * const __user *, + const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6e5d19788634..e6319d18a55d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); asmlinkage long sys_perf_event_open( diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 2b108538d0d9..3098a38f3ae1 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,10 +24,11 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); sys_setsid(); diff --git a/init/main.c b/init/main.c index 22d61cb06f98..94ab488039aa 100644 --- a/init/main.c +++ b/init/main.c @@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; extern const struct obs_kernel_param __setup_start[], __setup_end[]; @@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); diff --git a/kernel/kmod.c b/kernel/kmod.c index 6e9b19667a8d..9cd0591c96a2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data) goto fail; } - retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); /* Exec failed? */ fail: diff --git a/security/commoncap.c b/security/commoncap.c index 4e015996dd4d..9d172e6e330c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -40,7 +40,7 @@ * * Warn if that happens, once per boot. */ -static void warn_setuid_and_fcaps_mixed(char *fname) +static void warn_setuid_and_fcaps_mixed(const char *fname) { static int warned; if (!warned) { -- cgit v1.2.3-59-g8ed1b From 5c79a5ae23e72fa12f1c7c528f62bf3ea35da0dc Mon Sep 17 00:00:00 2001 From: Ernst Schwab Date: Mon, 16 Aug 2010 15:10:11 +0200 Subject: spi.h: missing kernel-doc notation, please fix Added comments in kernel-doc notation for previously added struct fields. Signed-off-by: Ernst Schwab Acked-by: Randy Dunlap Signed-off-by: Grant Likely --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ae0a5286f558..92e52a1e6af3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @flags: other constraints relevant to this driver + * @bus_lock_spinlock: spinlock for SPI bus locking + * @bus_lock_mutex: mutex for SPI bus locking + * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. -- cgit v1.2.3-59-g8ed1b From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:05:45 +0200 Subject: kill BH_Ordered flag Instead of abusing a buffer_head flag just add a variant of sync_dirty_buffer which allows passing the exact type of write flag required. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 17 ++++++++-------- fs/jbd/commit.c | 49 +++++++++++++++++++++++---------------------- fs/jbd2/commit.c | 39 ++++++++++++++---------------------- fs/nilfs2/super.c | 28 +++++++++++++------------- include/linux/buffer_head.h | 3 +-- 5 files changed, 63 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..6c8ad977f3d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_delay(bh)); BUG_ON(buffer_unwritten(bh)); - /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - /* * Only clear out a write error when rewriting */ @@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..68345430fb48 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..72c1cf83eb85 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -- cgit v1.2.3-59-g8ed1b From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:06:24 +0200 Subject: remove SWRITE* I/O types These flags aren't real I/O types, but tell ll_rw_block to always lock the buffer instead of giving up on a failed trylock. Instead add a new write_dirty_buffer helper that implements this semantic and use it from the existing SWRITE* callers. Note that the ll_rw_block code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which this patch fixes. In the ufs code clean up the helper that used to call ll_rw_block to mirror sync_dirty_buffer, which is the function it implements for compound buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 52 +++++++++++++++++++++++++-------------------- fs/fat/misc.c | 4 +++- fs/jbd/checkpoint.c | 4 +++- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd2/checkpoint.c | 4 +++- fs/jbd2/journal.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/ufs/balloc.c | 24 +++++++-------------- fs/ufs/ialloc.c | 18 ++++++---------- fs/ufs/truncate.c | 18 ++++++---------- fs/ufs/util.c | 20 +++++++---------- fs/ufs/util.h | 3 +-- include/linux/buffer_head.h | 1 + include/linux/fs.h | 9 -------- 16 files changed, 73 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 6c8ad977f3d4..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 72c1cf83eb85..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..29f7c975304c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3-59-g8ed1b From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:33 +1000 Subject: fs: fs_struct rwlock to spinlock fs: fs_struct rwlock to spinlock struct fs_struct.lock is an rwlock with the read-side used to protect root and pwd members while taking references to them. Taking a reference to a path typically requires just 2 atomic ops, so the critical section is very small. Parallel read-side operations would have cacheline contention on the lock, the dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a real parallelism increase. Replace it with a spinlock to avoid one or two atomic operations in typical path lookup fastpath. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/staging/pohmelfs/path_entry.c | 8 ++++---- fs/exec.c | 4 ++-- fs/fs_struct.c | 32 ++++++++++++++++---------------- include/linux/fs_struct.h | 14 +++++++------- kernel/fork.c | 10 +++++----- 5 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd50d638..8ec83d2dffb7 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); path.mnt = mntget(current->fs->root.mnt); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); path.dentry = d; @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); root = dget(current->fs->root.dentry); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); spin_lock(&dcache_lock); diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..5adab2c93eca 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d5202138..a42b5bf02f8b 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -5,7 +5,7 @@ struct fs_struct { int users; - rwlock_t lock; + spinlock_t lock; int umask; int in_exec; struct path root, pwd; @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 98b450876f93..856eac3ec52e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->in_exec) { - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return -EAGAIN; } fs->users++; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return 0; } tsk->fs = copy_fs_struct(fs); @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } if (new_mm) { -- cgit v1.2.3-59-g8ed1b From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:35 +1000 Subject: fs: cleanup files_lock locking fs: cleanup files_lock locking Lock tty_files with a new spinlock, tty_files_lock; provide helpers to manipulate the per-sb files list; unexport the files_lock spinlock. Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Andi Kleen Acked-by: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +++++- drivers/char/tty_io.c | 26 ++++++++++++++++++-------- fs/file_table.c | 42 ++++++++++++++++++------------------------ fs/open.c | 4 ++-- include/linux/fs.h | 7 ++----- include/linux/tty.h | 1 + security/selinux/hooks.c | 4 ++-- 7 files changed, 48 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae1f9bb..2c64faa8efa4 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; - file_move(filp, &tty->tty_files); + + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42375a2..cd5b829634ea 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +/* Spinlock to protect the tty->tty_files list */ +DEFINE_SPINLOCK(tty_files_lock); + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - file_list_lock(); + spin_lock(&tty_files_lock); list_for_each(p, &tty->tty_files) { count++; } - file_list_unlock(); + spin_unlock(&tty_files_lock); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty) workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); - file_list_lock(); + spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) @@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty) __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_ldisc_hangup(tty); @@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - file_list_lock(); + spin_lock(&tty_files_lock); list_del_init(&tty->tty_files); - file_list_unlock(); + spin_unlock(&tty_files_lock); put_pid(tty->pgrp); put_pid(tty->session); @@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - file_kill(filp); + spin_lock(&tty_files_lock); + BUG_ON(list_empty(&filp->f_u.fu_list)); + list_del_init(&filp->f_u.fu_list); + spin_unlock(&tty_files_lock); filp->private_data = NULL; /* @@ -1840,7 +1846,11 @@ got_driver: } filp->private_data = tty; - file_move(filp, &tty->tty_files); + BUG_ON(list_empty(&filp->f_u.fu_list)); + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..6f0e62ecfddd 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,8 +32,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +248,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +void file_sb_list_add(struct file *file, struct super_block *sb) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); + spin_lock(&files_lock); + BUG_ON(!list_empty(&file->f_u.fu_list)); + list_add(&file->f_u.fu_list, &sb->s_files); + spin_unlock(&files_lock); } -void file_kill(struct file *file) +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + spin_lock(&files_lock); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + spin_unlock(&files_lock); } } @@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb) struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(file, &sb->s_files, f_u.fu_list) { struct inode *inode = file->f_path.dentry->d_inode; @@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb) if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } - file_list_unlock(); + spin_unlock(&files_lock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + spin_unlock(&files_lock); return 0; } @@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(f, &sb->s_files, f_u.fu_list) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -408,16 +405,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + spin_unlock(&files_lock); mnt_drop_write(mnt); mntput(mnt); goto retry; } - file_list_unlock(); + spin_unlock(&files_lock); } void __init files_init(unsigned long mempages) diff --git a/fs/open.c b/fs/open.c index 630715f9f73d..d74e1983e8dc 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/include/linux/fs.h b/include/linux/fs.h index 29f7c975304c..5a9a9e5a3705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -944,9 +944,6 @@ struct file { unsigned long f_mnt_write_state; #endif }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) @@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3ddc62..f6b371a2514e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; +extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f96e54f..bd7da0f0ccf3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - file_list_lock(); + spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { struct inode *inode; @@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, drop_tty = 1; } } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_kref_put(tty); } /* Reset controlling tty. */ -- cgit v1.2.3-59-g8ed1b From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:36 +1000 Subject: tty: fix fu_list abuse tty: fix fu_list abuse tty code abuses fu_list, which causes a bug in remount,ro handling. If a tty device node is opened on a filesystem, then the last link to the inode removed, the filesystem will be allowed to be remounted readonly. This is because fs_may_remount_ro does not find the 0 link tty inode on the file sb list (because the tty code incorrectly removed it to use for its own purpose). This can result in a filesystem with errors after it is marked "clean". Taking idea from Christoph's initial patch, allocate a tty private struct at file->private_data and put our required list fields in there, linking file and tty. This makes tty nodes behave the same way as other device nodes and avoid meddling with the vfs, and avoids this bug. The error handling is not trivial in the tty code, so for this bugfix, I take the simple approach of using __GFP_NOFAIL and don't worry about memory errors. This is not a problem because our allocator doesn't fail small allocs as a rule anyway. So proper error handling is left as an exercise for tty hackers. [ Arguably filesystem's device inode would ideally be divorced from the driver's pseudo inode when it is opened, but in practice it's not clear whether that will ever be worth implementing. ] Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +--- drivers/char/tty_io.c | 84 +++++++++++++++++++++++++++++++----------------- fs/internal.h | 2 ++ include/linux/fs.h | 2 -- include/linux/tty.h | 8 +++++ security/selinux/hooks.c | 5 ++- 6 files changed, 69 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 2c64faa8efa4..c350d01716bd 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) } set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - filp->private_data = tty; - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index cd5b829634ea..949067a0bd47 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) kfree(tty); } +static inline struct tty_struct *file_tty(struct file *file) +{ + return ((struct tty_file_private *)file->private_data)->tty; +} + +/* Associate a new file with the tty structure */ +void tty_add_file(struct tty_struct *tty, struct file *file) +{ + struct tty_file_private *priv; + + /* XXX: must implement proper error handling in callers */ + priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); + + priv->tty = tty; + priv->file = file; + file->private_data = priv; + + spin_lock(&tty_files_lock); + list_add(&priv->list, &tty->tty_files); + spin_unlock(&tty_files_lock); +} + +/* Delete file from its tty */ +void tty_del_file(struct file *file) +{ + struct tty_file_private *priv = file->private_data; + + spin_lock(&tty_files_lock); + list_del(&priv->list); + spin_unlock(&tty_files_lock); + file->private_data = NULL; + kfree(priv); +} + + #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) /** @@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; + struct tty_file_private *priv; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { + if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } @@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ - list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { + list_for_each_entry(priv, &tty->tty_files, list) { + filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) @@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; - struct tty_struct *tty; - struct inode *inode; + struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; - tty = file->private_data; - inode = file->f_path.dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) @@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) static ssize_t tty_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); + struct tty_ldisc *ld; ssize_t ret; - struct tty_ldisc *ld; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->ops->write || @@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) int tty_release(struct inode *inode, struct file *filp) { - struct tty_struct *tty, *o_tty; + struct tty_struct *tty = file_tty(filp); + struct tty_struct *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; - tty = filp->private_data; if (tty_paranoia_check(tty, inode, "tty_release_dev")) return 0; @@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - spin_lock(&tty_files_lock); - BUG_ON(list_empty(&filp->f_u.fu_list)); - list_del_init(&filp->f_u.fu_list); - spin_unlock(&tty_files_lock); - filp->private_data = NULL; + tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. @@ -1845,12 +1875,8 @@ got_driver: return PTR_ERR(tty); } - filp->private_data = tty; - BUG_ON(list_empty(&filp->f_u.fu_list)); - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); + check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -1926,11 +1952,10 @@ got_driver: static unsigned int tty_poll(struct file *filp, poll_table *wait) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; int ret = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) return 0; @@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int __tty_fasync(int fd, struct file *filp, int on) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) goto out; @@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *tty, *real_tty; + struct tty_struct *tty = file_tty(file); + struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; struct inode *inode = file->f_dentry->d_inode; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; @@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - struct tty_struct *tty = file->private_data; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; @@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) if (!filp) continue; if (filp->f_op->read == tty_read && - filp->private_data == tty) { + file_tty(filp) == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", task_pid_nr(p), p->comm, i); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..6a5c13a80660 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a9a9e5a3705..5e65add0f163 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index f6b371a2514e..67d64e6efe7a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -329,6 +329,13 @@ struct tty_struct { struct tty_port *port; }; +/* Each of a tty's open files has private_data pointing to tty_file_private */ +struct tty_file_private { + struct tty_struct *tty; + struct file *file; + struct list_head list; +}; + /* tty magic number */ #define TTY_MAGIC 0x5401 @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); +extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bd7da0f0ccf3..4796ddd4e721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (tty) { spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { + struct tty_file_private *file_priv; struct inode *inode; /* Revalidate access to controlling tty. @@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, than using file_has_perm, as this particular open file may belong to another process and we are only interested in the inode-based check here. */ - file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); + file_priv = list_first_entry(&tty->tty_files, + struct tty_file_private, list); + file = file_priv->file; inode = file->f_path.dentry->d_inode; if (inode_has_perm(cred, inode, FILE__READ | FILE__WRITE, NULL)) { -- cgit v1.2.3-59-g8ed1b From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:37 +1000 Subject: lglock: introduce special lglock and brlock spin locks lglock: introduce special lglock and brlock spin locks This patch introduces "local-global" locks (lglocks). These can be used to: - Provide fast exclusive access to per-CPU data, with exclusive access to another CPU's data allowed but possibly subject to contention, and to provide very slow exclusive access to all per-CPU data. - Or to provide very fast and scalable read serialisation, and to provide very slow exclusive serialisation of data (not necessarily per-CPU data). Brlocks are also implemented as a short-hand notation for the latter use case. Thanks to Paul for local/global naming convention. Cc: linux-kernel@vger.kernel.org Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 include/linux/lglock.h (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 000000000000..b288cb713b90 --- /dev/null +++ b/include/linux/lglock.h @@ -0,0 +1,172 @@ +/* + * Specialised local-global spinlock. Can only be declared as global variables + * to avoid overhead and keep things simple (and we don't want to start using + * these inside dynamically allocated structures). + * + * "local/global locks" (lglocks) can be used to: + * + * - Provide fast exclusive access to per-CPU data, with exclusive access to + * another CPU's data allowed but possibly subject to contention, and to + * provide very slow exclusive access to all per-CPU data. + * - Or to provide very fast and scalable read serialisation, and to provide + * very slow exclusive serialisation of data (not necessarily per-CPU data). + * + * Brlocks are also implemented as a short-hand notation for the latter use + * case. + * + * Copyright 2009, 2010, Nick Piggin, Novell Inc. + */ +#ifndef __LINUX_LGLOCK_H +#define __LINUX_LGLOCK_H + +#include +#include +#include + +/* can make br locks by using local lock for read side, global lock for write */ +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() + +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) +#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define LOCKDEP_INIT_MAP lockdep_init_map + +#define DEFINE_LGLOCK_LOCKDEP(name) \ + struct lock_class_key name##_lock_key; \ + struct lockdep_map name##_lock_dep_map; \ + EXPORT_SYMBOL(name##_lock_dep_map) + +#else +#define LOCKDEP_INIT_MAP(a, b, c, d) + +#define DEFINE_LGLOCK_LOCKDEP(name) +#endif + + +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ + +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); +#endif -- cgit v1.2.3-59-g8ed1b From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:38 +1000 Subject: fs: scale files_lock fs: scale files_lock Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with an lglock. The lglock provides fast access to the per-cpu lists to add and remove files. It also provides a snapshot of all the per-cpu lists (although this is very slow). One difficulty with this approach is that a file can be removed from the list by another CPU. We must track which per-cpu list the file is on with a new variale in the file struct (packed into a hole on 64-bit archs). Scalability could suffer if files are frequently removed from different cpu's list. However loads with frequent removal of files imply short interval between adding and removing the files, and the scheduler attempts to avoid moving processes too far away. Also, even in the case of cross-CPU removal, the hardware has much more opportunity to parallelise cacheline transfers with N cachelines than with 1. A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs degenerates to contending on a single lock, which is no worse than before. When more than one CPU are allocating files, even if they are always freed by different CPUs, there will be more parallelism than the single-lock case. Testing results: On a 2 socket, 8 core opteron, I measure the number of times the lock is taken to remove the file, the number of times it is removed by the same CPU that added it, and the number of times it is removed by the same node that added it. Booting: locks= 25049 cpu-hits= 23174 (92.5%) node-hits= 23945 (95.6%) kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%) dbench 64 locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%) So a file is removed from the same CPU it was added by over 90% of the time. It remains within the same node 95% of the time. Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile. throughput 2.6.34-rc2 24.5 +patch 24.9 us sys idle IO wait (in %) 2.6.34-rc2 51.25 28.25 17.25 3.25 +patch 53.75 18.5 19 8.75 So significantly less CPU time spent in kernel code, higher idle time and slightly higher throughput. Single threaded performance difference was within the noise of microbenchmarks. That is not to say penalty does not exist, the code is larger and more memory accesses required so it will be slightly slower. Cc: linux-kernel@vger.kernel.org Cc: Tim Chen Cc: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 108 ++++++++++++++++++++++++++++++++++++++++++++--------- fs/super.c | 18 +++++++++ include/linux/fs.h | 7 ++++ 3 files changed, 115 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/file_table.c b/fs/file_table.c index 6f0e62ecfddd..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -32,7 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -336,30 +339,98 @@ void put_filp(struct file *file) } } +static inline int file_list_cpu(struct file *file) +{ +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); +} + +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ void file_sb_list_add(struct file *file, struct super_block *sb) { - spin_lock(&files_lock); - BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, &sb->s_files); - spin_unlock(&files_lock); + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); } +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spin_lock(&files_lock); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - spin_unlock(&files_lock); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - spin_lock(&files_lock); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); return 0; } @@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - spin_lock(&files_lock); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -406,12 +477,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e65add0f163..76041b614758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -920,6 +920,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1334,7 +1337,11 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ -- cgit v1.2.3-59-g8ed1b From 56385a12d9bb9e173751f74b6c430742018cafc0 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 18 Aug 2010 14:08:17 +0200 Subject: ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay parameter) With some hardware combinations, the PCM interrupts are acknowledged before the period boundary from the emu10k1 chip. The midlevel PCM code gets confused and the playback stream is interrupted. It seems that the interrupt processing shift by 2 samples is enough to fix this issue. This default value does not harm other, non-affected hardware. More information: Kernel bugzilla bug#16300 [A copmile warning fixed by tiwai] Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 1 + sound/core/pcm_native.c | 4 ++++ sound/pci/emu10k1/emu10k1.c | 4 ++++ sound/pci/emu10k1/emupcm.c | 30 ++++++++++++++++++++++++++---- sound/pci/emu10k1/memory.c | 4 +++- 5 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 6a664c3f7c1e..7dc97d12253c 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1707,6 +1707,7 @@ struct snd_emu10k1 { unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ unsigned long dma_mask; /* PCI DMA mask */ + unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ struct snd_dma_buffer silent_page; /* silent page */ struct snd_dma_buffer ptb_pages; /* page table pages */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a3b2a6479246..134fc6c2e08d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) { if (substream->runtime->trigger_master != substream) return 0; + /* some drivers might use hw_ptr to recover from the pause - + update the hw_ptr now */ + if (push) + snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta betwen the current jiffies, this gives a large enough * delta, effectively to skip the check once. diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 4203782d7cb7..aff8387c45cf 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64}; static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128}; static int enable_ir[SNDRV_CARDS]; static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */ +static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2}; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard."); @@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444); MODULE_PARM_DESC(enable_ir, "Enable IR."); module_param_array(subsystem, uint, NULL, 0444); MODULE_PARM_DESC(subsystem, "Force card subsystem model."); +module_param_array(delay_pcm_irq, uint, NULL, 0444); +MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0)."); /* * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400 */ @@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, &emu)) < 0) goto error; card->private_data = emu; + emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f; if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0) goto error; if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 55b83ef73c63..622bace148e3 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, evoice->epcm->ccca_start_addr = start_addr + ccis; if (extra) { start_addr += ccis; - end_addr += ccis; + end_addr += ccis + emu->delay_pcm_irq; } if (stereo && !extra) { snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK); @@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, /* Assumption that PT is already 0 so no harm overwriting */ snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]); snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24)); - snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24)); + snd_emu10k1_ptr_write(emu, PSST, voice, + (start_addr + (extra ? emu->delay_pcm_irq : 0)) | + (send_amount[2] << 24)); if (emu->card_capabilities->emu_model) pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */ else @@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_ snd_emu10k1_ptr_write(emu, IP, voice, 0); } +static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu, + struct snd_emu10k1_pcm *epcm, + struct snd_pcm_substream *substream, + struct snd_pcm_runtime *runtime) +{ + unsigned int ptr, period_pos; + + /* try to sychronize the current position for the interrupt + source voice */ + period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt; + period_pos %= runtime->period_size; + ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number); + ptr &= ~0x00ffffff; + ptr |= epcm->ccca_start_addr + period_pos; + snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr); +} + static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, int cmd) { @@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, /* follow thru */ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: + if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE) + snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime); mix = &emu->pcm_mixer[substream->number]; snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix); snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix); @@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream * #endif /* printk(KERN_DEBUG - "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n", - ptr, runtime->buffer_size, runtime->period_size); + "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n", + (long)ptr, (long)runtime->buffer_size, + (long)runtime->period_size); */ return ptr; } diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index ffb1ddb8dc28..957a311514c8 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!hdr)) return NULL; + idx = runtime->period_size >= runtime->buffer_size ? + (emu->delay_pcm_irq * 2) : 0; mutex_lock(&hdr->block_mutex); - blk = search_empty(emu, runtime->dma_bytes); + blk = search_empty(emu, runtime->dma_bytes + idx); if (blk == NULL) { mutex_unlock(&hdr->block_mutex); return NULL; -- cgit v1.2.3-59-g8ed1b From bd76af0f87f7a1815b311bde269a3f18305b3169 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 18 Aug 2010 14:16:54 +0200 Subject: ALSA: pcm midlevel code - add time check for double interrupt acknowledge The current code in pcm_lib.c do all checks using only the position in the ring buffer. Unfortunately, where the interrupts gets delayed or merged into one, we need another timing source to check when the buffer size boundary overlaps to avoid the wrong updating of the ring buffer pointers. This code uses jiffies to check the right time window without any performance impact. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + sound/core/pcm_lib.c | 14 +++++++++----- sound/core/pcm_native.c | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 85f1c6bf8566..dfd9b76b1853 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -278,6 +278,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */ unsigned long hw_ptr_jiffies; /* Time when hw_ptr is updated */ + unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */ snd_pcm_sframes_t delay; /* extra delay; typically FIFO size */ /* -- HW params -- */ diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index e23e0e7ab26f..a1707cca9c66 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -334,11 +334,15 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, /* delta = "expected next hw_ptr" for in_interrupt != 0 */ delta = runtime->hw_ptr_interrupt + runtime->period_size; if (delta > new_hw_ptr) { - hw_base += runtime->buffer_size; - if (hw_base >= runtime->boundary) - hw_base = 0; - new_hw_ptr = hw_base + pos; - goto __delta; + /* check for double acknowledged interrupts */ + hdelta = jiffies - runtime->hw_ptr_jiffies; + if (hdelta > runtime->hw_ptr_buffer_jiffies/2) { + hw_base += runtime->buffer_size; + if (hw_base >= runtime->boundary) + hw_base = 0; + new_hw_ptr = hw_base + pos; + goto __delta; + } } } /* new_hw_ptr might be lower than old_hw_ptr in case when */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 134fc6c2e08d..e2e73895db12 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -864,6 +864,8 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state) struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); runtime->hw_ptr_jiffies = jiffies; + runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / + runtime->rate; runtime->status->state = state; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) -- cgit v1.2.3-59-g8ed1b From d15ca3203754359cfe5d18910722d3089b204cc4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Aug 2010 18:55:33 +0100 Subject: Fix the declaration of sys_execve() in asm-generic/syscalls.h Fix the declaration of sys_execve() in asm-generic/syscalls.h to have various consts applied to its pointers. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-generic/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index df84e3b04555..d89dec864d42 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs); #endif #ifndef sys_execve -asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); +asmlinkage long sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs); #endif #ifndef sys_mmap2 -- cgit v1.2.3-59-g8ed1b From a49f37eed22b74221f271811ea41323654e40dad Mon Sep 17 00:00:00 2001 From: Sachin Sanap Date: Fri, 13 Aug 2010 21:22:49 +0000 Subject: net: add Fast Ethernet driver for PXA168. Signed-off-by: Sachin Sanap Signed-off-by: David S. Miller --- drivers/net/Kconfig | 10 + drivers/net/Makefile | 1 + drivers/net/pxa168_eth.c | 1666 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pxa168_eth.h | 30 + 4 files changed, 1707 insertions(+) create mode 100644 drivers/net/pxa168_eth.c create mode 100644 include/linux/pxa168_eth.h (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ebe68395ecf8..fe581566cb26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -928,6 +928,16 @@ config SMC91X The module will be called smc91x. If you want to compile it as a module, say M here and read . +config PXA168_ETH + tristate "Marvell pxa168 ethernet support" + depends on CPU_PXA168 + select PHYLIB + help + This driver supports the pxa168 Ethernet ports. + + To compile this driver as a module, choose M here. The module + will be called pxa168_eth. + config NET_NETX tristate "NetX Ethernet support" select MII diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 56e8c27f77ce..3e8f150c4b14 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -244,6 +244,7 @@ obj-$(CONFIG_MYRI10GE) += myri10ge/ obj-$(CONFIG_SMC91X) += smc91x.o obj-$(CONFIG_SMC911X) += smc911x.o obj-$(CONFIG_SMSC911X) += smsc911x.o +obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c new file mode 100644 index 000000000000..ecc64d750cce --- /dev/null +++ b/drivers/net/pxa168_eth.c @@ -0,0 +1,1666 @@ +/* + * PXA168 ethernet driver. + * Most of the code is derived from mv643xx ethernet driver. + * + * Copyright (C) 2010 Marvell International Ltd. + * Sachin Sanap + * Philip Rakity + * Mark Brown + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "pxa168-eth" +#define DRIVER_VERSION "0.3" + +/* + * Registers + */ + +#define PHY_ADDRESS 0x0000 +#define SMI 0x0010 +#define PORT_CONFIG 0x0400 +#define PORT_CONFIG_EXT 0x0408 +#define PORT_COMMAND 0x0410 +#define PORT_STATUS 0x0418 +#define HTPR 0x0428 +#define SDMA_CONFIG 0x0440 +#define SDMA_CMD 0x0448 +#define INT_CAUSE 0x0450 +#define INT_W_CLEAR 0x0454 +#define INT_MASK 0x0458 +#define ETH_F_RX_DESC_0 0x0480 +#define ETH_C_RX_DESC_0 0x04A0 +#define ETH_C_TX_DESC_1 0x04E4 + +/* smi register */ +#define SMI_BUSY (1 << 28) /* 0 - Write, 1 - Read */ +#define SMI_R_VALID (1 << 27) /* 0 - Write, 1 - Read */ +#define SMI_OP_W (0 << 26) /* Write operation */ +#define SMI_OP_R (1 << 26) /* Read operation */ + +#define PHY_WAIT_ITERATIONS 10 + +#define PXA168_ETH_PHY_ADDR_DEFAULT 0 +/* RX & TX descriptor command */ +#define BUF_OWNED_BY_DMA (1 << 31) + +/* RX descriptor status */ +#define RX_EN_INT (1 << 23) +#define RX_FIRST_DESC (1 << 17) +#define RX_LAST_DESC (1 << 16) +#define RX_ERROR (1 << 15) + +/* TX descriptor command */ +#define TX_EN_INT (1 << 23) +#define TX_GEN_CRC (1 << 22) +#define TX_ZERO_PADDING (1 << 18) +#define TX_FIRST_DESC (1 << 17) +#define TX_LAST_DESC (1 << 16) +#define TX_ERROR (1 << 15) + +/* SDMA_CMD */ +#define SDMA_CMD_AT (1 << 31) +#define SDMA_CMD_TXDL (1 << 24) +#define SDMA_CMD_TXDH (1 << 23) +#define SDMA_CMD_AR (1 << 15) +#define SDMA_CMD_ERD (1 << 7) + +/* Bit definitions of the Port Config Reg */ +#define PCR_HS (1 << 12) +#define PCR_EN (1 << 7) +#define PCR_PM (1 << 0) + +/* Bit definitions of the Port Config Extend Reg */ +#define PCXR_2BSM (1 << 28) +#define PCXR_DSCP_EN (1 << 21) +#define PCXR_MFL_1518 (0 << 14) +#define PCXR_MFL_1536 (1 << 14) +#define PCXR_MFL_2048 (2 << 14) +#define PCXR_MFL_64K (3 << 14) +#define PCXR_FLP (1 << 11) +#define PCXR_PRIO_TX_OFF 3 +#define PCXR_TX_HIGH_PRI (7 << PCXR_PRIO_TX_OFF) + +/* Bit definitions of the SDMA Config Reg */ +#define SDCR_BSZ_OFF 12 +#define SDCR_BSZ8 (3 << SDCR_BSZ_OFF) +#define SDCR_BSZ4 (2 << SDCR_BSZ_OFF) +#define SDCR_BSZ2 (1 << SDCR_BSZ_OFF) +#define SDCR_BSZ1 (0 << SDCR_BSZ_OFF) +#define SDCR_BLMR (1 << 6) +#define SDCR_BLMT (1 << 7) +#define SDCR_RIFB (1 << 9) +#define SDCR_RC_OFF 2 +#define SDCR_RC_MAX_RETRANS (0xf << SDCR_RC_OFF) + +/* + * Bit definitions of the Interrupt Cause Reg + * and Interrupt MASK Reg is the same + */ +#define ICR_RXBUF (1 << 0) +#define ICR_TXBUF_H (1 << 2) +#define ICR_TXBUF_L (1 << 3) +#define ICR_TXEND_H (1 << 6) +#define ICR_TXEND_L (1 << 7) +#define ICR_RXERR (1 << 8) +#define ICR_TXERR_H (1 << 10) +#define ICR_TXERR_L (1 << 11) +#define ICR_TX_UDR (1 << 13) +#define ICR_MII_CH (1 << 28) + +#define ALL_INTS (ICR_TXBUF_H | ICR_TXBUF_L | ICR_TX_UDR |\ + ICR_TXERR_H | ICR_TXERR_L |\ + ICR_TXEND_H | ICR_TXEND_L |\ + ICR_RXBUF | ICR_RXERR | ICR_MII_CH) + +#define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ + +#define NUM_RX_DESCS 64 +#define NUM_TX_DESCS 64 + +#define HASH_ADD 0 +#define HASH_DELETE 1 +#define HASH_ADDR_TABLE_SIZE 0x4000 /* 16K (1/2K address - PCR_HS == 1) */ +#define HOP_NUMBER 12 + +/* Bit definitions for Port status */ +#define PORT_SPEED_100 (1 << 0) +#define FULL_DUPLEX (1 << 1) +#define FLOW_CONTROL_ENABLED (1 << 2) +#define LINK_UP (1 << 3) + +/* Bit definitions for work to be done */ +#define WORK_LINK (1 << 0) +#define WORK_TX_DONE (1 << 1) + +/* + * Misc definitions. + */ +#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES) + +struct rx_desc { + u32 cmd_sts; /* Descriptor command status */ + u16 byte_cnt; /* Descriptor buffer byte count */ + u16 buf_size; /* Buffer size */ + u32 buf_ptr; /* Descriptor buffer pointer */ + u32 next_desc_ptr; /* Next descriptor pointer */ +}; + +struct tx_desc { + u32 cmd_sts; /* Command/status field */ + u16 reserved; + u16 byte_cnt; /* buffer byte count */ + u32 buf_ptr; /* pointer to buffer for this descriptor */ + u32 next_desc_ptr; /* Pointer to next descriptor */ +}; + +struct pxa168_eth_private { + int port_num; /* User Ethernet port number */ + + int rx_resource_err; /* Rx ring resource error flag */ + + /* Next available and first returning Rx resource */ + int rx_curr_desc_q, rx_used_desc_q; + + /* Next available and first returning Tx resource */ + int tx_curr_desc_q, tx_used_desc_q; + + struct rx_desc *p_rx_desc_area; + dma_addr_t rx_desc_dma; + int rx_desc_area_size; + struct sk_buff **rx_skb; + + struct tx_desc *p_tx_desc_area; + dma_addr_t tx_desc_dma; + int tx_desc_area_size; + struct sk_buff **tx_skb; + + struct work_struct tx_timeout_task; + + struct net_device *dev; + struct napi_struct napi; + u8 work_todo; + int skb_size; + + struct net_device_stats stats; + /* Size of Tx Ring per queue */ + int tx_ring_size; + /* Number of tx descriptors in use */ + int tx_desc_count; + /* Size of Rx Ring per queue */ + int rx_ring_size; + /* Number of rx descriptors in use */ + int rx_desc_count; + + /* + * Used in case RX Ring is empty, which can occur when + * system does not have resources (skb's) + */ + struct timer_list timeout; + struct mii_bus *smi_bus; + struct phy_device *phy; + + /* clock */ + struct clk *clk; + struct pxa168_eth_platform_data *pd; + /* + * Ethernet controller base address. + */ + void __iomem *base; + + /* Pointer to the hardware address filter table */ + void *htpr; + dma_addr_t htpr_dma; +}; + +struct addr_table_entry { + __le32 lo; + __le32 hi; +}; + +/* Bit fields of a Hash Table Entry */ +enum hash_table_entry { + HASH_ENTRY_VALID = 1, + SKIP = 2, + HASH_ENTRY_RECEIVE_DISCARD = 4, + HASH_ENTRY_RECEIVE_DISCARD_BIT = 2 +}; + +static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static int pxa168_init_hw(struct pxa168_eth_private *pep); +static void eth_port_reset(struct net_device *dev); +static void eth_port_start(struct net_device *dev); +static int pxa168_eth_open(struct net_device *dev); +static int pxa168_eth_stop(struct net_device *dev); +static int ethernet_phy_setup(struct net_device *dev); + +static inline u32 rdl(struct pxa168_eth_private *pep, int offset) +{ + return readl(pep->base + offset); +} + +static inline void wrl(struct pxa168_eth_private *pep, int offset, u32 data) +{ + writel(data, pep->base + offset); +} + +static void abort_dma(struct pxa168_eth_private *pep) +{ + int delay; + int max_retries = 40; + + do { + wrl(pep, SDMA_CMD, SDMA_CMD_AR | SDMA_CMD_AT); + udelay(100); + + delay = 10; + while ((rdl(pep, SDMA_CMD) & (SDMA_CMD_AR | SDMA_CMD_AT)) + && delay-- > 0) { + udelay(10); + } + } while (max_retries-- > 0 && delay <= 0); + + if (max_retries <= 0) + printk(KERN_ERR "%s : DMA Stuck\n", __func__); +} + +static int ethernet_phy_get(struct pxa168_eth_private *pep) +{ + unsigned int reg_data; + + reg_data = rdl(pep, PHY_ADDRESS); + + return (reg_data >> (5 * pep->port_num)) & 0x1f; +} + +static void ethernet_phy_set_addr(struct pxa168_eth_private *pep, int phy_addr) +{ + u32 reg_data; + int addr_shift = 5 * pep->port_num; + + reg_data = rdl(pep, PHY_ADDRESS); + reg_data &= ~(0x1f << addr_shift); + reg_data |= (phy_addr & 0x1f) << addr_shift; + wrl(pep, PHY_ADDRESS, reg_data); +} + +static void ethernet_phy_reset(struct pxa168_eth_private *pep) +{ + int data; + + data = phy_read(pep->phy, MII_BMCR); + if (data < 0) + return; + + data |= BMCR_RESET; + if (phy_write(pep->phy, MII_BMCR, data) < 0) + return; + + do { + data = phy_read(pep->phy, MII_BMCR); + } while (data >= 0 && data & BMCR_RESET); +} + +static void rxq_refill(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct sk_buff *skb; + struct rx_desc *p_used_rx_desc; + int used_rx_desc; + + while (pep->rx_desc_count < pep->rx_ring_size) { + int size; + + skb = dev_alloc_skb(pep->skb_size); + if (!skb) + break; + if (SKB_DMA_REALIGN) + skb_reserve(skb, SKB_DMA_REALIGN); + pep->rx_desc_count++; + /* Get 'used' Rx descriptor */ + used_rx_desc = pep->rx_used_desc_q; + p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc]; + size = skb->end - skb->data; + p_used_rx_desc->buf_ptr = dma_map_single(NULL, + skb->data, + size, + DMA_FROM_DEVICE); + p_used_rx_desc->buf_size = size; + pep->rx_skb[used_rx_desc] = skb; + + /* Return the descriptor to DMA ownership */ + wmb(); + p_used_rx_desc->cmd_sts = BUF_OWNED_BY_DMA | RX_EN_INT; + wmb(); + + /* Move the used descriptor pointer to the next descriptor */ + pep->rx_used_desc_q = (used_rx_desc + 1) % pep->rx_ring_size; + + /* Any Rx return cancels the Rx resource error status */ + pep->rx_resource_err = 0; + + skb_reserve(skb, ETH_HW_IP_ALIGN); + } + + /* + * If RX ring is empty of SKB, set a timer to try allocating + * again at a later time. + */ + if (pep->rx_desc_count == 0) { + pep->timeout.expires = jiffies + (HZ / 10); + add_timer(&pep->timeout); + } +} + +static inline void rxq_refill_timer_wrapper(unsigned long data) +{ + struct pxa168_eth_private *pep = (void *)data; + napi_schedule(&pep->napi); +} + +static inline u8 flip_8_bits(u8 x) +{ + return (((x) & 0x01) << 3) | (((x) & 0x02) << 1) + | (((x) & 0x04) >> 1) | (((x) & 0x08) >> 3) + | (((x) & 0x10) << 3) | (((x) & 0x20) << 1) + | (((x) & 0x40) >> 1) | (((x) & 0x80) >> 3); +} + +static void nibble_swap_every_byte(unsigned char *mac_addr) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) { + mac_addr[i] = ((mac_addr[i] & 0x0f) << 4) | + ((mac_addr[i] & 0xf0) >> 4); + } +} + +static void inverse_every_nibble(unsigned char *mac_addr) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = flip_8_bits(mac_addr[i]); +} + +/* + * ---------------------------------------------------------------------------- + * This function will calculate the hash function of the address. + * Inputs + * mac_addr_orig - MAC address. + * Outputs + * return the calculated entry. + */ +static u32 hash_function(unsigned char *mac_addr_orig) +{ + u32 hash_result; + u32 addr0; + u32 addr1; + u32 addr2; + u32 addr3; + unsigned char mac_addr[ETH_ALEN]; + + /* Make a copy of MAC address since we are going to performe bit + * operations on it + */ + memcpy(mac_addr, mac_addr_orig, ETH_ALEN); + + nibble_swap_every_byte(mac_addr); + inverse_every_nibble(mac_addr); + + addr0 = (mac_addr[5] >> 2) & 0x3f; + addr1 = (mac_addr[5] & 0x03) | (((mac_addr[4] & 0x7f)) << 2); + addr2 = ((mac_addr[4] & 0x80) >> 7) | mac_addr[3] << 1; + addr3 = (mac_addr[2] & 0xff) | ((mac_addr[1] & 1) << 8); + + hash_result = (addr0 << 9) | (addr1 ^ addr2 ^ addr3); + hash_result = hash_result & 0x07ff; + return hash_result; +} + +/* + * ---------------------------------------------------------------------------- + * This function will add/del an entry to the address table. + * Inputs + * pep - ETHERNET . + * mac_addr - MAC address. + * skip - if 1, skip this address.Used in case of deleting an entry which is a + * part of chain in the hash table.We cant just delete the entry since + * that will break the chain.We need to defragment the tables time to + * time. + * rd - 0 Discard packet upon match. + * - 1 Receive packet upon match. + * Outputs + * address table entry is added/deleted. + * 0 if success. + * -ENOSPC if table full + */ +static int add_del_hash_entry(struct pxa168_eth_private *pep, + unsigned char *mac_addr, + u32 rd, u32 skip, int del) +{ + struct addr_table_entry *entry, *start; + u32 new_high; + u32 new_low; + u32 i; + + new_low = (((mac_addr[1] >> 4) & 0xf) << 15) + | (((mac_addr[1] >> 0) & 0xf) << 11) + | (((mac_addr[0] >> 4) & 0xf) << 7) + | (((mac_addr[0] >> 0) & 0xf) << 3) + | (((mac_addr[3] >> 4) & 0x1) << 31) + | (((mac_addr[3] >> 0) & 0xf) << 27) + | (((mac_addr[2] >> 4) & 0xf) << 23) + | (((mac_addr[2] >> 0) & 0xf) << 19) + | (skip << SKIP) | (rd << HASH_ENTRY_RECEIVE_DISCARD_BIT) + | HASH_ENTRY_VALID; + + new_high = (((mac_addr[5] >> 4) & 0xf) << 15) + | (((mac_addr[5] >> 0) & 0xf) << 11) + | (((mac_addr[4] >> 4) & 0xf) << 7) + | (((mac_addr[4] >> 0) & 0xf) << 3) + | (((mac_addr[3] >> 5) & 0x7) << 0); + + /* + * Pick the appropriate table, start scanning for free/reusable + * entries at the index obtained by hashing the specified MAC address + */ + start = (struct addr_table_entry *)(pep->htpr); + entry = start + hash_function(mac_addr); + for (i = 0; i < HOP_NUMBER; i++) { + if (!(le32_to_cpu(entry->lo) & HASH_ENTRY_VALID)) { + break; + } else { + /* if same address put in same position */ + if (((le32_to_cpu(entry->lo) & 0xfffffff8) == + (new_low & 0xfffffff8)) && + (le32_to_cpu(entry->hi) == new_high)) { + break; + } + } + if (entry == start + 0x7ff) + entry = start; + else + entry++; + } + + if (((le32_to_cpu(entry->lo) & 0xfffffff8) != (new_low & 0xfffffff8)) && + (le32_to_cpu(entry->hi) != new_high) && del) + return 0; + + if (i == HOP_NUMBER) { + if (!del) { + printk(KERN_INFO "%s: table section is full, need to " + "move to 16kB implementation?\n", + __FILE__); + return -ENOSPC; + } else + return 0; + } + + /* + * Update the selected entry + */ + if (del) { + entry->hi = 0; + entry->lo = 0; + } else { + entry->hi = cpu_to_le32(new_high); + entry->lo = cpu_to_le32(new_low); + } + + return 0; +} + +/* + * ---------------------------------------------------------------------------- + * Create an addressTable entry from MAC address info + * found in the specifed net_device struct + * + * Input : pointer to ethernet interface network device structure + * Output : N/A + */ +static void update_hash_table_mac_address(struct pxa168_eth_private *pep, + unsigned char *oaddr, + unsigned char *addr) +{ + /* Delete old entry */ + if (oaddr) + add_del_hash_entry(pep, oaddr, 1, 0, HASH_DELETE); + /* Add new entry */ + add_del_hash_entry(pep, addr, 1, 0, HASH_ADD); +} + +static int init_hash_table(struct pxa168_eth_private *pep) +{ + /* + * Hardware expects CPU to build a hash table based on a predefined + * hash function and populate it based on hardware address. The + * location of the hash table is identified by 32-bit pointer stored + * in HTPR internal register. Two possible sizes exists for the hash + * table 8kB (256kB of DRAM required (4 x 64 kB banks)) and 1/2kB + * (16kB of DRAM required (4 x 4 kB banks)).We currently only support + * 1/2kB. + */ + /* TODO: Add support for 8kB hash table and alternative hash + * function.Driver can dynamically switch to them if the 1/2kB hash + * table is full. + */ + if (pep->htpr == NULL) { + pep->htpr = dma_alloc_coherent(pep->dev->dev.parent, + HASH_ADDR_TABLE_SIZE, + &pep->htpr_dma, GFP_KERNEL); + if (pep->htpr == NULL) + return -ENOMEM; + } + memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); + wrl(pep, HTPR, pep->htpr_dma); + return 0; +} + +static void pxa168_eth_set_rx_mode(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct netdev_hw_addr *ha; + u32 val; + + val = rdl(pep, PORT_CONFIG); + if (dev->flags & IFF_PROMISC) + val |= PCR_PM; + else + val &= ~PCR_PM; + wrl(pep, PORT_CONFIG, val); + + /* + * Remove the old list of MAC address and add dev->addr + * and multicast address. + */ + memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); + update_hash_table_mac_address(pep, NULL, dev->dev_addr); + + netdev_for_each_mc_addr(ha, dev) + update_hash_table_mac_address(pep, NULL, ha->addr); +} + +static int pxa168_eth_set_mac_address(struct net_device *dev, void *addr) +{ + struct sockaddr *sa = addr; + struct pxa168_eth_private *pep = netdev_priv(dev); + unsigned char oldMac[ETH_ALEN]; + + if (!is_valid_ether_addr(sa->sa_data)) + return -EINVAL; + memcpy(oldMac, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN); + netif_addr_lock_bh(dev); + update_hash_table_mac_address(pep, oldMac, dev->dev_addr); + netif_addr_unlock_bh(dev); + return 0; +} + +static void eth_port_start(struct net_device *dev) +{ + unsigned int val = 0; + struct pxa168_eth_private *pep = netdev_priv(dev); + int tx_curr_desc, rx_curr_desc; + + /* Perform PHY reset, if there is a PHY. */ + if (pep->phy != NULL) { + struct ethtool_cmd cmd; + + pxa168_get_settings(pep->dev, &cmd); + ethernet_phy_reset(pep); + pxa168_set_settings(pep->dev, &cmd); + } + + /* Assignment of Tx CTRP of given queue */ + tx_curr_desc = pep->tx_curr_desc_q; + wrl(pep, ETH_C_TX_DESC_1, + (u32) ((struct tx_desc *)pep->tx_desc_dma + tx_curr_desc)); + + /* Assignment of Rx CRDP of given queue */ + rx_curr_desc = pep->rx_curr_desc_q; + wrl(pep, ETH_C_RX_DESC_0, + (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc)); + + wrl(pep, ETH_F_RX_DESC_0, + (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc)); + + /* Clear all interrupts */ + wrl(pep, INT_CAUSE, 0); + + /* Enable all interrupts for receive, transmit and error. */ + wrl(pep, INT_MASK, ALL_INTS); + + val = rdl(pep, PORT_CONFIG); + val |= PCR_EN; + wrl(pep, PORT_CONFIG, val); + + /* Start RX DMA engine */ + val = rdl(pep, SDMA_CMD); + val |= SDMA_CMD_ERD; + wrl(pep, SDMA_CMD, val); +} + +static void eth_port_reset(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + unsigned int val = 0; + + /* Stop all interrupts for receive, transmit and error. */ + wrl(pep, INT_MASK, 0); + + /* Clear all interrupts */ + wrl(pep, INT_CAUSE, 0); + + /* Stop RX DMA */ + val = rdl(pep, SDMA_CMD); + val &= ~SDMA_CMD_ERD; /* abort dma command */ + + /* Abort any transmit and receive operations and put DMA + * in idle state. + */ + abort_dma(pep); + + /* Disable port */ + val = rdl(pep, PORT_CONFIG); + val &= ~PCR_EN; + wrl(pep, PORT_CONFIG, val); +} + +/* + * txq_reclaim - Free the tx desc data for completed descriptors + * If force is non-zero, frees uncompleted descriptors as well + */ +static int txq_reclaim(struct net_device *dev, int force) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct tx_desc *desc; + u32 cmd_sts; + struct sk_buff *skb; + int tx_index; + dma_addr_t addr; + int count; + int released = 0; + + netif_tx_lock(dev); + + pep->work_todo &= ~WORK_TX_DONE; + while (pep->tx_desc_count > 0) { + tx_index = pep->tx_used_desc_q; + desc = &pep->p_tx_desc_area[tx_index]; + cmd_sts = desc->cmd_sts; + if (!force && (cmd_sts & BUF_OWNED_BY_DMA)) { + if (released > 0) { + goto txq_reclaim_end; + } else { + released = -1; + goto txq_reclaim_end; + } + } + pep->tx_used_desc_q = (tx_index + 1) % pep->tx_ring_size; + pep->tx_desc_count--; + addr = desc->buf_ptr; + count = desc->byte_cnt; + skb = pep->tx_skb[tx_index]; + if (skb) + pep->tx_skb[tx_index] = NULL; + + if (cmd_sts & TX_ERROR) { + if (net_ratelimit()) + printk(KERN_ERR "%s: Error in TX\n", dev->name); + dev->stats.tx_errors++; + } + dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE); + if (skb) + dev_kfree_skb_irq(skb); + released++; + } +txq_reclaim_end: + netif_tx_unlock(dev); + return released; +} + +static void pxa168_eth_tx_timeout(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + printk(KERN_INFO "%s: TX timeout desc_count %d\n", + dev->name, pep->tx_desc_count); + + schedule_work(&pep->tx_timeout_task); +} + +static void pxa168_eth_tx_timeout_task(struct work_struct *work) +{ + struct pxa168_eth_private *pep = container_of(work, + struct pxa168_eth_private, + tx_timeout_task); + struct net_device *dev = pep->dev; + pxa168_eth_stop(dev); + pxa168_eth_open(dev); +} + +static int rxq_process(struct net_device *dev, int budget) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + unsigned int received_packets = 0; + struct sk_buff *skb; + + while (budget-- > 0) { + int rx_next_curr_desc, rx_curr_desc, rx_used_desc; + struct rx_desc *rx_desc; + unsigned int cmd_sts; + + /* Do not process Rx ring in case of Rx ring resource error */ + if (pep->rx_resource_err) + break; + rx_curr_desc = pep->rx_curr_desc_q; + rx_used_desc = pep->rx_used_desc_q; + rx_desc = &pep->p_rx_desc_area[rx_curr_desc]; + cmd_sts = rx_desc->cmd_sts; + rmb(); + if (cmd_sts & (BUF_OWNED_BY_DMA)) + break; + skb = pep->rx_skb[rx_curr_desc]; + pep->rx_skb[rx_curr_desc] = NULL; + + rx_next_curr_desc = (rx_curr_desc + 1) % pep->rx_ring_size; + pep->rx_curr_desc_q = rx_next_curr_desc; + + /* Rx descriptors exhausted. */ + /* Set the Rx ring resource error flag */ + if (rx_next_curr_desc == rx_used_desc) + pep->rx_resource_err = 1; + pep->rx_desc_count--; + dma_unmap_single(NULL, rx_desc->buf_ptr, + rx_desc->buf_size, + DMA_FROM_DEVICE); + received_packets++; + /* + * Update statistics. + * Note byte count includes 4 byte CRC count + */ + stats->rx_packets++; + stats->rx_bytes += rx_desc->byte_cnt; + /* + * In case received a packet without first / last bits on OR + * the error summary bit is on, the packets needs to be droped. + */ + if (((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) != + (RX_FIRST_DESC | RX_LAST_DESC)) + || (cmd_sts & RX_ERROR)) { + + stats->rx_dropped++; + if ((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) != + (RX_FIRST_DESC | RX_LAST_DESC)) { + if (net_ratelimit()) + printk(KERN_ERR + "%s: Rx pkt on multiple desc\n", + dev->name); + } + if (cmd_sts & RX_ERROR) + stats->rx_errors++; + dev_kfree_skb_irq(skb); + } else { + /* + * The -4 is for the CRC in the trailer of the + * received packet + */ + skb_put(skb, rx_desc->byte_cnt - 4); + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); + } + dev->last_rx = jiffies; + } + /* Fill RX ring with skb's */ + rxq_refill(dev); + return received_packets; +} + +static int pxa168_eth_collect_events(struct pxa168_eth_private *pep, + struct net_device *dev) +{ + u32 icr; + int ret = 0; + + icr = rdl(pep, INT_CAUSE); + if (icr == 0) + return IRQ_NONE; + + wrl(pep, INT_CAUSE, ~icr); + if (icr & (ICR_TXBUF_H | ICR_TXBUF_L)) { + pep->work_todo |= WORK_TX_DONE; + ret = 1; + } + if (icr & ICR_RXBUF) + ret = 1; + if (icr & ICR_MII_CH) { + pep->work_todo |= WORK_LINK; + ret = 1; + } + return ret; +} + +static void handle_link_event(struct pxa168_eth_private *pep) +{ + struct net_device *dev = pep->dev; + u32 port_status; + int speed; + int duplex; + int fc; + + port_status = rdl(pep, PORT_STATUS); + if (!(port_status & LINK_UP)) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + txq_reclaim(dev, 1); + } + return; + } + if (port_status & PORT_SPEED_100) + speed = 100; + else + speed = 10; + + duplex = (port_status & FULL_DUPLEX) ? 1 : 0; + fc = (port_status & FLOW_CONTROL_ENABLED) ? 1 : 0; + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", fc ? "en" : "dis"); + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); +} + +static irqreturn_t pxa168_eth_int_handler(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (unlikely(!pxa168_eth_collect_events(pep, dev))) + return IRQ_NONE; + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + napi_schedule(&pep->napi); + return IRQ_HANDLED; +} + +static void pxa168_eth_recalc_skb_size(struct pxa168_eth_private *pep) +{ + int skb_size; + + /* + * Reserve 2+14 bytes for an ethernet header (the hardware + * automatically prepends 2 bytes of dummy data to each + * received packet), 16 bytes for up to four VLAN tags, and + * 4 bytes for the trailing FCS -- 36 bytes total. + */ + skb_size = pep->dev->mtu + 36; + + /* + * Make sure that the skb size is a multiple of 8 bytes, as + * the lower three bits of the receive descriptor's buffer + * size field are ignored by the hardware. + */ + pep->skb_size = (skb_size + 7) & ~7; + + /* + * If NET_SKB_PAD is smaller than a cache line, + * netdev_alloc_skb() will cause skb->data to be misaligned + * to a cache line boundary. If this is the case, include + * some extra space to allow re-aligning the data area. + */ + pep->skb_size += SKB_DMA_REALIGN; + +} + +static int set_port_config_ext(struct pxa168_eth_private *pep) +{ + int skb_size; + + pxa168_eth_recalc_skb_size(pep); + if (pep->skb_size <= 1518) + skb_size = PCXR_MFL_1518; + else if (pep->skb_size <= 1536) + skb_size = PCXR_MFL_1536; + else if (pep->skb_size <= 2048) + skb_size = PCXR_MFL_2048; + else + skb_size = PCXR_MFL_64K; + + /* Extended Port Configuration */ + wrl(pep, + PORT_CONFIG_EXT, PCXR_2BSM | /* Two byte prefix aligns IP hdr */ + PCXR_DSCP_EN | /* Enable DSCP in IP */ + skb_size | PCXR_FLP | /* do not force link pass */ + PCXR_TX_HIGH_PRI); /* Transmit - high priority queue */ + + return 0; +} + +static int pxa168_init_hw(struct pxa168_eth_private *pep) +{ + int err = 0; + + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + wrl(pep, INT_CAUSE, 0); + /* Write to ICR to clear interrupts. */ + wrl(pep, INT_W_CLEAR, 0); + /* Abort any transmit and receive operations and put DMA + * in idle state. + */ + abort_dma(pep); + /* Initialize address hash table */ + err = init_hash_table(pep); + if (err) + return err; + /* SDMA configuration */ + wrl(pep, SDMA_CONFIG, SDCR_BSZ8 | /* Burst size = 32 bytes */ + SDCR_RIFB | /* Rx interrupt on frame */ + SDCR_BLMT | /* Little endian transmit */ + SDCR_BLMR | /* Little endian receive */ + SDCR_RC_MAX_RETRANS); /* Max retransmit count */ + /* Port Configuration */ + wrl(pep, PORT_CONFIG, PCR_HS); /* Hash size is 1/2kb */ + set_port_config_ext(pep); + + return err; +} + +static int rxq_init(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct rx_desc *p_rx_desc; + int size = 0, i = 0; + int rx_desc_num = pep->rx_ring_size; + + /* Allocate RX skb rings */ + pep->rx_skb = kmalloc(sizeof(*pep->rx_skb) * pep->rx_ring_size, + GFP_KERNEL); + if (!pep->rx_skb) { + printk(KERN_ERR "%s: Cannot alloc RX skb ring\n", dev->name); + return -ENOMEM; + } + /* Allocate RX ring */ + pep->rx_desc_count = 0; + size = pep->rx_ring_size * sizeof(struct rx_desc); + pep->rx_desc_area_size = size; + pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, + &pep->rx_desc_dma, GFP_KERNEL); + if (!pep->p_rx_desc_area) { + printk(KERN_ERR "%s: Cannot alloc RX ring (size %d bytes)\n", + dev->name, size); + goto out; + } + memset((void *)pep->p_rx_desc_area, 0, size); + /* initialize the next_desc_ptr links in the Rx descriptors ring */ + p_rx_desc = (struct rx_desc *)pep->p_rx_desc_area; + for (i = 0; i < rx_desc_num; i++) { + p_rx_desc[i].next_desc_ptr = pep->rx_desc_dma + + ((i + 1) % rx_desc_num) * sizeof(struct rx_desc); + } + /* Save Rx desc pointer to driver struct. */ + pep->rx_curr_desc_q = 0; + pep->rx_used_desc_q = 0; + pep->rx_desc_area_size = rx_desc_num * sizeof(struct rx_desc); + return 0; +out: + kfree(pep->rx_skb); + return -ENOMEM; +} + +static void rxq_deinit(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int curr; + + /* Free preallocated skb's on RX rings */ + for (curr = 0; pep->rx_desc_count && curr < pep->rx_ring_size; curr++) { + if (pep->rx_skb[curr]) { + dev_kfree_skb(pep->rx_skb[curr]); + pep->rx_desc_count--; + } + } + if (pep->rx_desc_count) + printk(KERN_ERR + "Error in freeing Rx Ring. %d skb's still\n", + pep->rx_desc_count); + /* Free RX ring */ + if (pep->p_rx_desc_area) + dma_free_coherent(pep->dev->dev.parent, pep->rx_desc_area_size, + pep->p_rx_desc_area, pep->rx_desc_dma); + kfree(pep->rx_skb); +} + +static int txq_init(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct tx_desc *p_tx_desc; + int size = 0, i = 0; + int tx_desc_num = pep->tx_ring_size; + + pep->tx_skb = kmalloc(sizeof(*pep->tx_skb) * pep->tx_ring_size, + GFP_KERNEL); + if (!pep->tx_skb) { + printk(KERN_ERR "%s: Cannot alloc TX skb ring\n", dev->name); + return -ENOMEM; + } + /* Allocate TX ring */ + pep->tx_desc_count = 0; + size = pep->tx_ring_size * sizeof(struct tx_desc); + pep->tx_desc_area_size = size; + pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, + &pep->tx_desc_dma, GFP_KERNEL); + if (!pep->p_tx_desc_area) { + printk(KERN_ERR "%s: Cannot allocate Tx Ring (size %d bytes)\n", + dev->name, size); + goto out; + } + memset((void *)pep->p_tx_desc_area, 0, pep->tx_desc_area_size); + /* Initialize the next_desc_ptr links in the Tx descriptors ring */ + p_tx_desc = (struct tx_desc *)pep->p_tx_desc_area; + for (i = 0; i < tx_desc_num; i++) { + p_tx_desc[i].next_desc_ptr = pep->tx_desc_dma + + ((i + 1) % tx_desc_num) * sizeof(struct tx_desc); + } + pep->tx_curr_desc_q = 0; + pep->tx_used_desc_q = 0; + pep->tx_desc_area_size = tx_desc_num * sizeof(struct tx_desc); + return 0; +out: + kfree(pep->tx_skb); + return -ENOMEM; +} + +static void txq_deinit(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + /* Free outstanding skb's on TX ring */ + txq_reclaim(dev, 1); + BUG_ON(pep->tx_used_desc_q != pep->tx_curr_desc_q); + /* Free TX ring */ + if (pep->p_tx_desc_area) + dma_free_coherent(pep->dev->dev.parent, pep->tx_desc_area_size, + pep->p_tx_desc_area, pep->tx_desc_dma); + kfree(pep->tx_skb); +} + +static int pxa168_eth_open(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int err; + + err = request_irq(dev->irq, pxa168_eth_int_handler, + IRQF_DISABLED, dev->name, dev); + if (err) { + dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n"); + return -EAGAIN; + } + pep->rx_resource_err = 0; + err = rxq_init(dev); + if (err != 0) + goto out_free_irq; + err = txq_init(dev); + if (err != 0) + goto out_free_rx_skb; + pep->rx_used_desc_q = 0; + pep->rx_curr_desc_q = 0; + + /* Fill RX ring with skb's */ + rxq_refill(dev); + pep->rx_used_desc_q = 0; + pep->rx_curr_desc_q = 0; + netif_carrier_off(dev); + eth_port_start(dev); + napi_enable(&pep->napi); + return 0; +out_free_rx_skb: + rxq_deinit(dev); +out_free_irq: + free_irq(dev->irq, dev); + return err; +} + +static int pxa168_eth_stop(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + eth_port_reset(dev); + + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + wrl(pep, INT_CAUSE, 0); + /* Write to ICR to clear interrupts. */ + wrl(pep, INT_W_CLEAR, 0); + napi_disable(&pep->napi); + del_timer_sync(&pep->timeout); + netif_carrier_off(dev); + free_irq(dev->irq, dev); + rxq_deinit(dev); + txq_deinit(dev); + + return 0; +} + +static int pxa168_eth_change_mtu(struct net_device *dev, int mtu) +{ + int retval; + struct pxa168_eth_private *pep = netdev_priv(dev); + + if ((mtu > 9500) || (mtu < 68)) + return -EINVAL; + + dev->mtu = mtu; + retval = set_port_config_ext(pep); + + if (!netif_running(dev)) + return 0; + + /* + * Stop and then re-open the interface. This will allocate RX + * skbs of the new MTU. + * There is a possible danger that the open will not succeed, + * due to memory being full. + */ + pxa168_eth_stop(dev); + if (pxa168_eth_open(dev)) { + dev_printk(KERN_ERR, &dev->dev, + "fatal error on re-opening device after " + "MTU change\n"); + } + + return 0; +} + +static int eth_alloc_tx_desc_index(struct pxa168_eth_private *pep) +{ + int tx_desc_curr; + + tx_desc_curr = pep->tx_curr_desc_q; + pep->tx_curr_desc_q = (tx_desc_curr + 1) % pep->tx_ring_size; + BUG_ON(pep->tx_curr_desc_q == pep->tx_used_desc_q); + pep->tx_desc_count++; + + return tx_desc_curr; +} + +static int pxa168_rx_poll(struct napi_struct *napi, int budget) +{ + struct pxa168_eth_private *pep = + container_of(napi, struct pxa168_eth_private, napi); + struct net_device *dev = pep->dev; + int work_done = 0; + + if (unlikely(pep->work_todo & WORK_LINK)) { + pep->work_todo &= ~(WORK_LINK); + handle_link_event(pep); + } + /* + * We call txq_reclaim every time since in NAPI interupts are disabled + * and due to this we miss the TX_DONE interrupt,which is not updated in + * interrupt status register. + */ + txq_reclaim(dev, 0); + if (netif_queue_stopped(dev) + && pep->tx_ring_size - pep->tx_desc_count > 1) { + netif_wake_queue(dev); + } + work_done = rxq_process(dev, budget); + if (work_done < budget) { + napi_complete(napi); + wrl(pep, INT_MASK, ALL_INTS); + } + + return work_done; +} + +static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct tx_desc *desc; + int tx_index; + int length; + + tx_index = eth_alloc_tx_desc_index(pep); + desc = &pep->p_tx_desc_area[tx_index]; + length = skb->len; + pep->tx_skb[tx_index] = skb; + desc->byte_cnt = length; + desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + wmb(); + desc->cmd_sts = BUF_OWNED_BY_DMA | TX_GEN_CRC | TX_FIRST_DESC | + TX_ZERO_PADDING | TX_LAST_DESC | TX_EN_INT; + wmb(); + wrl(pep, SDMA_CMD, SDMA_CMD_TXDH | SDMA_CMD_ERD); + + stats->tx_bytes += skb->len; + stats->tx_packets++; + dev->trans_start = jiffies; + if (pep->tx_ring_size - pep->tx_desc_count <= 1) { + /* We handled the current skb, but now we are out of space.*/ + netif_stop_queue(dev); + } + + return NETDEV_TX_OK; +} + +static int smi_wait_ready(struct pxa168_eth_private *pep) +{ + int i = 0; + + /* wait for the SMI register to become available */ + for (i = 0; rdl(pep, SMI) & SMI_BUSY; i++) { + if (i == PHY_WAIT_ITERATIONS) + return -ETIMEDOUT; + msleep(10); + } + + return 0; +} + +static int pxa168_smi_read(struct mii_bus *bus, int phy_addr, int regnum) +{ + struct pxa168_eth_private *pep = bus->priv; + int i = 0; + int val; + + if (smi_wait_ready(pep)) { + printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | SMI_OP_R); + /* now wait for the data to be valid */ + for (i = 0; !((val = rdl(pep, SMI)) & SMI_R_VALID); i++) { + if (i == PHY_WAIT_ITERATIONS) { + printk(KERN_WARNING + "pxa168_eth: SMI bus read not valid\n"); + return -ENODEV; + } + msleep(10); + } + + return val & 0xffff; +} + +static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum, + u16 value) +{ + struct pxa168_eth_private *pep = bus->priv; + + if (smi_wait_ready(pep)) { + printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + + wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | + SMI_OP_W | (value & 0xffff)); + + if (smi_wait_ready(pep)) { + printk(KERN_ERR "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + if (pep->phy != NULL) + return phy_mii_ioctl(pep->phy, if_mii(ifr), cmd); + + return -EOPNOTSUPP; +} + +static struct phy_device *phy_scan(struct pxa168_eth_private *pep, int phy_addr) +{ + struct mii_bus *bus = pep->smi_bus; + struct phy_device *phydev; + int start; + int num; + int i; + + if (phy_addr == PXA168_ETH_PHY_ADDR_DEFAULT) { + /* Scan entire range */ + start = ethernet_phy_get(pep); + num = 32; + } else { + /* Use phy addr specific to platform */ + start = phy_addr & 0x1f; + num = 1; + } + phydev = NULL; + for (i = 0; i < num; i++) { + int addr = (start + i) & 0x1f; + if (bus->phy_map[addr] == NULL) + mdiobus_scan(bus, addr); + + if (phydev == NULL) { + phydev = bus->phy_map[addr]; + if (phydev != NULL) + ethernet_phy_set_addr(pep, addr); + } + } + + return phydev; +} + +static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex) +{ + struct phy_device *phy = pep->phy; + ethernet_phy_reset(pep); + + phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII); + + if (speed == 0) { + phy->autoneg = AUTONEG_ENABLE; + phy->speed = 0; + phy->duplex = 0; + phy->supported &= PHY_BASIC_FEATURES; + phy->advertising = phy->supported | ADVERTISED_Autoneg; + } else { + phy->autoneg = AUTONEG_DISABLE; + phy->advertising = 0; + phy->speed = speed; + phy->duplex = duplex; + } + phy_start_aneg(phy); +} + +static int ethernet_phy_setup(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (pep->pd != NULL) { + if (pep->pd->init) + pep->pd->init(); + } + pep->phy = phy_scan(pep, pep->pd->phy_addr & 0x1f); + if (pep->phy != NULL) + phy_init(pep, pep->pd->speed, pep->pd->duplex); + update_hash_table_mac_address(pep, NULL, dev->dev_addr); + + return 0; +} + +static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int err; + + err = phy_read_status(pep->phy); + if (err == 0) + err = phy_ethtool_gset(pep->phy, cmd); + + return err; +} + +static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + return phy_ethtool_sset(pep->phy, cmd); +} + +static void pxa168_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strncpy(info->driver, DRIVER_NAME, 32); + strncpy(info->version, DRIVER_VERSION, 32); + strncpy(info->fw_version, "N/A", 32); + strncpy(info->bus_info, "N/A", 32); +} + +static u32 pxa168_get_link(struct net_device *dev) +{ + return !!netif_carrier_ok(dev); +} + +static const struct ethtool_ops pxa168_ethtool_ops = { + .get_settings = pxa168_get_settings, + .set_settings = pxa168_set_settings, + .get_drvinfo = pxa168_get_drvinfo, + .get_link = pxa168_get_link, +}; + +static const struct net_device_ops pxa168_eth_netdev_ops = { + .ndo_open = pxa168_eth_open, + .ndo_stop = pxa168_eth_stop, + .ndo_start_xmit = pxa168_eth_start_xmit, + .ndo_set_rx_mode = pxa168_eth_set_rx_mode, + .ndo_set_mac_address = pxa168_eth_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = pxa168_eth_do_ioctl, + .ndo_change_mtu = pxa168_eth_change_mtu, + .ndo_tx_timeout = pxa168_eth_tx_timeout, +}; + +static int pxa168_eth_probe(struct platform_device *pdev) +{ + struct pxa168_eth_private *pep = NULL; + struct net_device *dev = NULL; + struct resource *res; + struct clk *clk; + int err; + + printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); + + clk = clk_get(&pdev->dev, "MFUCLK"); + if (IS_ERR(clk)) { + printk(KERN_ERR "%s: Fast Ethernet failed to get clock\n", + DRIVER_NAME); + return -ENODEV; + } + clk_enable(clk); + + dev = alloc_etherdev(sizeof(struct pxa168_eth_private)); + if (!dev) { + err = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dev); + pep = netdev_priv(dev); + pep->dev = dev; + pep->clk = clk; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + err = -ENODEV; + goto out; + } + pep->base = ioremap(res->start, res->end - res->start + 1); + if (pep->base == NULL) { + err = -ENOMEM; + goto out; + } + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + BUG_ON(!res); + dev->irq = res->start; + dev->netdev_ops = &pxa168_eth_netdev_ops; + dev->watchdog_timeo = 2 * HZ; + dev->base_addr = 0; + SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops); + + INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); + + printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME); + random_ether_addr(dev->dev_addr); + + pep->pd = pdev->dev.platform_data; + pep->rx_ring_size = NUM_RX_DESCS; + if (pep->pd->rx_queue_size) + pep->rx_ring_size = pep->pd->rx_queue_size; + + pep->tx_ring_size = NUM_TX_DESCS; + if (pep->pd->tx_queue_size) + pep->tx_ring_size = pep->pd->tx_queue_size; + + pep->port_num = pep->pd->port_number; + /* Hardware supports only 3 ports */ + BUG_ON(pep->port_num > 2); + netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size); + + memset(&pep->timeout, 0, sizeof(struct timer_list)); + init_timer(&pep->timeout); + pep->timeout.function = rxq_refill_timer_wrapper; + pep->timeout.data = (unsigned long)pep; + + pep->smi_bus = mdiobus_alloc(); + if (pep->smi_bus == NULL) { + err = -ENOMEM; + goto out; + } + pep->smi_bus->priv = pep; + pep->smi_bus->name = "pxa168_eth smi"; + pep->smi_bus->read = pxa168_smi_read; + pep->smi_bus->write = pxa168_smi_write; + snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id); + pep->smi_bus->parent = &pdev->dev; + pep->smi_bus->phy_mask = 0xffffffff; + if (mdiobus_register(pep->smi_bus) < 0) { + err = -ENOMEM; + goto out; + } + pxa168_init_hw(pep); + err = ethernet_phy_setup(dev); + if (err) + goto out; + SET_NETDEV_DEV(dev, &pdev->dev); + err = register_netdev(dev); + if (err) + goto out; + return 0; +out: + if (pep->clk) { + clk_disable(pep->clk); + clk_put(pep->clk); + pep->clk = NULL; + } + if (pep->base) { + iounmap(pep->base); + pep->base = NULL; + } + if (dev) + free_netdev(dev); + return err; +} + +static int pxa168_eth_remove(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (pep->htpr) { + dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE, + pep->htpr, pep->htpr_dma); + pep->htpr = NULL; + } + if (pep->clk) { + clk_disable(pep->clk); + clk_put(pep->clk); + pep->clk = NULL; + } + if (pep->phy != NULL) + phy_detach(pep->phy); + + iounmap(pep->base); + pep->base = NULL; + unregister_netdev(dev); + flush_scheduled_work(); + free_netdev(dev); + platform_set_drvdata(pdev, NULL); + return 0; +} + +static void pxa168_eth_shutdown(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + eth_port_reset(dev); +} + +#ifdef CONFIG_PM +static int pxa168_eth_resume(struct platform_device *pdev) +{ + return -ENOSYS; +} + +static int pxa168_eth_suspend(struct platform_device *pdev, pm_message_t state) +{ + return -ENOSYS; +} + +#else +#define pxa168_eth_resume NULL +#define pxa168_eth_suspend NULL +#endif + +static struct platform_driver pxa168_eth_driver = { + .probe = pxa168_eth_probe, + .remove = pxa168_eth_remove, + .shutdown = pxa168_eth_shutdown, + .resume = pxa168_eth_resume, + .suspend = pxa168_eth_suspend, + .driver = { + .name = DRIVER_NAME, + }, +}; + +static int __init pxa168_init_module(void) +{ + return platform_driver_register(&pxa168_eth_driver); +} + +static void __exit pxa168_cleanup_module(void) +{ + platform_driver_unregister(&pxa168_eth_driver); +} + +module_init(pxa168_init_module); +module_exit(pxa168_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ethernet driver for Marvell PXA168"); +MODULE_ALIAS("platform:pxa168_eth"); diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h new file mode 100644 index 000000000000..18d75e795606 --- /dev/null +++ b/include/linux/pxa168_eth.h @@ -0,0 +1,30 @@ +/* + *pxa168 ethernet platform device data definition file. + */ +#ifndef __LINUX_PXA168_ETH_H +#define __LINUX_PXA168_ETH_H + +struct pxa168_eth_platform_data { + int port_number; + int phy_addr; + + /* + * If speed is 0, then speed and duplex are autonegotiated. + */ + int speed; /* 0, SPEED_10, SPEED_100 */ + int duplex; /* DUPLEX_HALF or DUPLEX_FULL */ + + /* + * Override default RX/TX queue sizes if nonzero. + */ + int rx_queue_size; + int tx_queue_size; + + /* + * init callback is used for board specific initialization + * e.g on Aspenite its used to initialize the PHY transceiver. + */ + int (*init)(void); +}; + +#endif /* __LINUX_PXA168_ETH_H */ -- cgit v1.2.3-59-g8ed1b From e243f5b6de35b6fc394bc2e1e1737afe538e7e0c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 15 Aug 2010 10:03:57 +0000 Subject: netfilter: fix userspace header warning "make headers_check" issued the following warning: CHECK include/linux/netfilter (64 files) usr/include/linux/netfilter/xt_ipvs.h:19: found __[us]{8,16,32,64} type without #include Fix this by as suggested including linux/types.h. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- include/linux/netfilter/xt_ipvs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h index 1167aeb7a347..eff34ac18808 100644 --- a/include/linux/netfilter/xt_ipvs.h +++ b/include/linux/netfilter/xt_ipvs.h @@ -1,6 +1,8 @@ #ifndef _XT_IPVS_H #define _XT_IPVS_H +#include + enum { XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */ XT_IPVS_PROTO = 1 << 1, -- cgit v1.2.3-59-g8ed1b From ede1b4290781ae82ccf0f2ecc6dada8d3dd35779 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 18 Aug 2010 15:33:13 -0700 Subject: tracing: Fix timer tracing PowerTOP would like to be able to trace timers. Unfortunately, the current timer tracing is not very useful: the actual timer function is not recorded in the trace at the start of timer execution. Although this is recorded for timer "start" time (when it gets armed), this is not useful; most timers get started early, and a tracer like PowerTOP will never see this event, but will only see the actual running of the timer. This patch just adds the function to the timer tracing; I've verified with PowerTOP that now it can get useful information about timers. Signed-off-by: Arjan van de Ven Cc: xiaoguangrong@cn.fujitsu.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: # .35.x, .34.x, .33.x LKML-Reference: <4C6C5FA9.3000405@linux.intel.com> Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index c624126a9c8a..425bcfe56c62 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -81,14 +81,16 @@ TRACE_EVENT(timer_expire_entry, TP_STRUCT__entry( __field( void *, timer ) __field( unsigned long, now ) + __field( void *, function) ), TP_fast_assign( __entry->timer = timer; __entry->now = jiffies; + __entry->function = timer->function; ), - TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) + TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) ); /** @@ -200,14 +202,16 @@ TRACE_EVENT(hrtimer_expire_entry, TP_STRUCT__entry( __field( void *, hrtimer ) __field( s64, now ) + __field( void *, function) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = now->tv64; + __entry->function = hrtimer->function; ), - TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); -- cgit v1.2.3-59-g8ed1b From 1495cc9df4e81f5a8fa9b0b8f1034b14d24b7d8c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:46 -0700 Subject: Input: sysrq - drop tty argument from sysrq ops handlers Noone is using tty argument so let's get rid of it. Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- arch/arm/kernel/etm.c | 2 +- arch/powerpc/xmon/xmon.c | 5 ++--- arch/sparc/kernel/process_64.c | 2 +- drivers/char/sysrq.c | 42 ++++++++++++++++++++--------------------- drivers/gpu/drm/drm_fb_helper.c | 2 +- drivers/net/ibm_newemac/debug.c | 2 +- include/linux/sysrq.h | 6 +++++- kernel/debug/debug_core.c | 2 +- kernel/power/poweroff.c | 2 +- 9 files changed, 34 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 56418f98cd01..33c7077174db 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -230,7 +230,7 @@ static void etm_dump(void) etb_lock(t); } -static void sysrq_etm_dump(int key, struct tty_struct *tty) +static void sysrq_etm_dump(int key) { dev_dbg(tracer.dev, "Dumping ETB buffer\n"); etm_dump(); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0554445200bf..d17d04cfb2cd 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2880,15 +2880,14 @@ static void xmon_init(int enable) } #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_xmon(int key, struct tty_struct *tty) +static void sysrq_handle_xmon(int key) { /* ensure xmon is enabled */ xmon_init(1); debugger(get_irq_regs()); } -static struct sysrq_key_op sysrq_xmon_op = -{ +static struct sysrq_key_op sysrq_xmon_op = { .handler = sysrq_handle_xmon, .help_msg = "Xmon", .action_msg = "Entering xmon", diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a368b45..25b01b43b40d 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -303,7 +303,7 @@ void arch_trigger_all_cpu_backtrace(void) #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_globreg(int key, struct tty_struct *tty) +static void sysrq_handle_globreg(int key) { arch_trigger_all_cpu_backtrace(); } diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 878ac0c2cc68..a892a3c249dd 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -76,7 +76,7 @@ static int __init sysrq_always_enabled_setup(char *str) __setup("sysrq_always_enabled", sysrq_always_enabled_setup); -static void sysrq_handle_loglevel(int key, struct tty_struct *tty) +static void sysrq_handle_loglevel(int key) { int i; @@ -93,7 +93,7 @@ static struct sysrq_key_op sysrq_loglevel_op = { }; #ifdef CONFIG_VT -static void sysrq_handle_SAK(int key, struct tty_struct *tty) +static void sysrq_handle_SAK(int key) { struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work; schedule_work(SAK_work); @@ -109,7 +109,7 @@ static struct sysrq_key_op sysrq_SAK_op = { #endif #ifdef CONFIG_VT -static void sysrq_handle_unraw(int key, struct tty_struct *tty) +static void sysrq_handle_unraw(int key) { struct kbd_struct *kbd = &kbd_table[fg_console]; @@ -126,7 +126,7 @@ static struct sysrq_key_op sysrq_unraw_op = { #define sysrq_unraw_op (*(struct sysrq_key_op *)NULL) #endif /* CONFIG_VT */ -static void sysrq_handle_crash(int key, struct tty_struct *tty) +static void sysrq_handle_crash(int key) { char *killer = NULL; @@ -141,7 +141,7 @@ static struct sysrq_key_op sysrq_crash_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_reboot(int key, struct tty_struct *tty) +static void sysrq_handle_reboot(int key) { lockdep_off(); local_irq_enable(); @@ -154,7 +154,7 @@ static struct sysrq_key_op sysrq_reboot_op = { .enable_mask = SYSRQ_ENABLE_BOOT, }; -static void sysrq_handle_sync(int key, struct tty_struct *tty) +static void sysrq_handle_sync(int key) { emergency_sync(); } @@ -165,7 +165,7 @@ static struct sysrq_key_op sysrq_sync_op = { .enable_mask = SYSRQ_ENABLE_SYNC, }; -static void sysrq_handle_show_timers(int key, struct tty_struct *tty) +static void sysrq_handle_show_timers(int key) { sysrq_timer_list_show(); } @@ -176,7 +176,7 @@ static struct sysrq_key_op sysrq_show_timers_op = { .action_msg = "Show clockevent devices & pending hrtimers (no others)", }; -static void sysrq_handle_mountro(int key, struct tty_struct *tty) +static void sysrq_handle_mountro(int key) { emergency_remount(); } @@ -188,7 +188,7 @@ static struct sysrq_key_op sysrq_mountro_op = { }; #ifdef CONFIG_LOCKDEP -static void sysrq_handle_showlocks(int key, struct tty_struct *tty) +static void sysrq_handle_showlocks(int key) { debug_show_all_locks(); } @@ -226,7 +226,7 @@ static void sysrq_showregs_othercpus(struct work_struct *dummy) static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus); -static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) +static void sysrq_handle_showallcpus(int key) { /* * Fall back to the workqueue based printing if the @@ -252,7 +252,7 @@ static struct sysrq_key_op sysrq_showallcpus_op = { }; #endif -static void sysrq_handle_showregs(int key, struct tty_struct *tty) +static void sysrq_handle_showregs(int key) { struct pt_regs *regs = get_irq_regs(); if (regs) @@ -266,7 +266,7 @@ static struct sysrq_key_op sysrq_showregs_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_showstate(int key, struct tty_struct *tty) +static void sysrq_handle_showstate(int key) { show_state(); } @@ -277,7 +277,7 @@ static struct sysrq_key_op sysrq_showstate_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) +static void sysrq_handle_showstate_blocked(int key) { show_state_filter(TASK_UNINTERRUPTIBLE); } @@ -291,7 +291,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { #ifdef CONFIG_TRACING #include -static void sysrq_ftrace_dump(int key, struct tty_struct *tty) +static void sysrq_ftrace_dump(int key) { ftrace_dump(DUMP_ALL); } @@ -305,7 +305,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = { #define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL) #endif -static void sysrq_handle_showmem(int key, struct tty_struct *tty) +static void sysrq_handle_showmem(int key) { show_mem(); } @@ -330,7 +330,7 @@ static void send_sig_all(int sig) } } -static void sysrq_handle_term(int key, struct tty_struct *tty) +static void sysrq_handle_term(int key) { send_sig_all(SIGTERM); console_loglevel = 8; @@ -349,7 +349,7 @@ static void moom_callback(struct work_struct *ignored) static DECLARE_WORK(moom_work, moom_callback); -static void sysrq_handle_moom(int key, struct tty_struct *tty) +static void sysrq_handle_moom(int key) { schedule_work(&moom_work); } @@ -361,7 +361,7 @@ static struct sysrq_key_op sysrq_moom_op = { }; #ifdef CONFIG_BLOCK -static void sysrq_handle_thaw(int key, struct tty_struct *tty) +static void sysrq_handle_thaw(int key) { emergency_thaw_all(); } @@ -373,7 +373,7 @@ static struct sysrq_key_op sysrq_thaw_op = { }; #endif -static void sysrq_handle_kill(int key, struct tty_struct *tty) +static void sysrq_handle_kill(int key) { send_sig_all(SIGKILL); console_loglevel = 8; @@ -385,7 +385,7 @@ static struct sysrq_key_op sysrq_kill_op = { .enable_mask = SYSRQ_ENABLE_SIGNAL, }; -static void sysrq_handle_unrt(int key, struct tty_struct *tty) +static void sysrq_handle_unrt(int key) { normalize_rt_tasks(); } @@ -520,7 +520,7 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { printk("%s\n", op_p->action_msg); console_loglevel = orig_log_level; - op_p->handler(key, tty); + op_p->handler(key); } else { printk("This sysrq operation is disabled.\n"); } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index de82e201d682..5efd6d6742ec 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -369,7 +369,7 @@ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored) } static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn); -static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3) +static void drm_fb_helper_sysrq(int dummy1) { schedule_work(&drm_fb_helper_restore_work); } diff --git a/drivers/net/ibm_newemac/debug.c b/drivers/net/ibm_newemac/debug.c index 3995fafc1e08..8c6c1e2a8750 100644 --- a/drivers/net/ibm_newemac/debug.c +++ b/drivers/net/ibm_newemac/debug.c @@ -238,7 +238,7 @@ void emac_dbg_dump_all(void) } #if defined(CONFIG_MAGIC_SYSRQ) -static void emac_sysrq_handler(int key, struct tty_struct *tty) +static void emac_sysrq_handler(int key) { emac_dbg_dump_all(); } diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 609e8ca5f534..4ee650315119 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -31,7 +31,7 @@ struct tty_struct; #define SYSRQ_ENABLE_RTNICE 0x0100 struct sysrq_key_op { - void (*handler)(int, struct tty_struct *); + void (*handler)(int); char *help_msg; char *action_msg; int enable_mask; @@ -58,6 +58,10 @@ static inline void handle_sysrq(int key, struct tty_struct *tty) { } +static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +{ +} + static inline int register_sysrq_key(int key, struct sysrq_key_op *op) { return -EINVAL; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 3c2d4972d235..de407c78178d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -741,7 +741,7 @@ static struct console kgdbcons = { }; #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_dbg(int key, struct tty_struct *tty) +static void sysrq_handle_dbg(int key) { if (!dbg_io_ops) { printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index e8b337006276..d52359374e85 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -24,7 +24,7 @@ static void do_poweroff(struct work_struct *dummy) static DECLARE_WORK(poweroff_work, do_poweroff); -static void handle_poweroff(int key, struct tty_struct *tty) +static void handle_poweroff(int key) { /* run sysrq poweroff on boot cpu */ schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work); -- cgit v1.2.3-59-g8ed1b From b35de43b31040828f83046f40fd34ba33146409d Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:27 -0700 Subject: kfifo: implement missing __kfifo_skip_r() kfifo_skip() is currently broken, due to the missing of the internal helper function. Add it. Signed-off-by: Andrea Righi Cc: Greg KH Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 ++ kernel/kfifo.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 311f8753d713..4aa95f203f3e 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -836,6 +836,8 @@ extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize); extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize); +extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize); + extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize); diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 4502604ecadf..6b5580c57644 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, } EXPORT_SYMBOL(__kfifo_out_r); +void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int n; + + n = __kfifo_peek_n(fifo, recsize); + fifo->out += n + recsize; +} +EXPORT_SYMBOL(__kfifo_skip_r); + int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied, size_t recsize) { -- cgit v1.2.3-59-g8ed1b From f335397d177c906256ee1bba28e8c49e8ec63817 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:47 -0700 Subject: Input: sysrq - drop tty argument form handle_sysrq() Sysrq operations do not accept tty argument anymore so no need to pass it to us. [Stephen Rothwell : fix build breakage in drm code caused by sysrq using bool but not including linux/types.h] [Sachin Sant : fix build breakage in s390 keyboadr driver] Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- arch/ia64/hp/sim/simserial.c | 2 +- arch/um/drivers/mconsole_kern.c | 2 +- drivers/char/hangcheck-timer.c | 2 +- drivers/char/hvc_console.c | 2 +- drivers/char/hvsi.c | 2 +- drivers/char/sysrq.c | 11 +++++------ drivers/s390/char/ctrlchar.c | 4 +--- drivers/s390/char/keyboard.c | 2 +- drivers/serial/sn_console.c | 2 +- drivers/usb/serial/generic.c | 2 +- drivers/xen/manage.c | 2 +- include/linux/serial_core.h | 2 +- include/linux/sysrq.h | 12 +++++------- kernel/debug/kdb/kdb_main.c | 2 +- 14 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 2bef5261d96d..1e8d71ad93ef 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -149,7 +149,7 @@ static void receive_chars(struct tty_struct *tty) ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); while (!ch); - handle_sysrq(ch, NULL); + handle_sysrq(ch); } #endif seen_esc = 0; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index de317d0c3294..ebc680717e59 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -690,7 +690,7 @@ static void with_console(struct mc_request *req, void (*proc)(void *), static void sysrq_proc(void *arg) { char *op = arg; - handle_sysrq(*op, NULL); + handle_sysrq(*op); } void mconsole_sysrq(struct mc_request *req) diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index e0249722d25f..f953c96efc86 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -159,7 +159,7 @@ static void hangcheck_fire(unsigned long data) if (hangcheck_dump_tasks) { printk(KERN_CRIT "Hangcheck: Task state:\n"); #ifdef CONFIG_MAGIC_SYSRQ - handle_sysrq('t', NULL); + handle_sysrq('t'); #endif /* CONFIG_MAGIC_SYSRQ */ } if (hangcheck_reboot) { diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index fa27d1676ee5..3afd62e856eb 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -651,7 +651,7 @@ int hvc_poll(struct hvc_struct *hp) if (sysrq_pressed) continue; } else if (sysrq_pressed) { - handle_sysrq(buf[i], tty); + handle_sysrq(buf[i]); sysrq_pressed = 0; continue; } diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c index 1f4b6de65a2d..a2bc885ce60a 100644 --- a/drivers/char/hvsi.c +++ b/drivers/char/hvsi.c @@ -403,7 +403,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, const char *buf, int len) hp->sysrq = 1; continue; } else if (hp->sysrq) { - handle_sysrq(c, hp->tty); + handle_sysrq(c); hp->sysrq = 0; continue; } diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index a892a3c249dd..ef31bb81e843 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -493,7 +492,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p) sysrq_key_table[i] = op_p; } -void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) +void __handle_sysrq(int key, bool check_mask) { struct sysrq_key_op *op_p; int orig_log_level; @@ -545,10 +544,10 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) spin_unlock_irqrestore(&sysrq_key_table_lock, flags); } -void handle_sysrq(int key, struct tty_struct *tty) +void handle_sysrq(int key) { if (sysrq_on()) - __handle_sysrq(key, tty, 1); + __handle_sysrq(key, true); } EXPORT_SYMBOL(handle_sysrq); @@ -597,7 +596,7 @@ static bool sysrq_filter(struct input_handle *handle, unsigned int type, default: if (sysrq_down && value && value != 2) - __handle_sysrq(sysrq_xlate[code], NULL, 1); + __handle_sysrq(sysrq_xlate[code], true); break; } @@ -765,7 +764,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, if (get_user(c, buf)) return -EFAULT; - __handle_sysrq(c, NULL, 0); + __handle_sysrq(c, false); } return count; diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c index c6cbcb3f925e..0e9a309b9669 100644 --- a/drivers/s390/char/ctrlchar.c +++ b/drivers/s390/char/ctrlchar.c @@ -16,12 +16,11 @@ #ifdef CONFIG_MAGIC_SYSRQ static int ctrlchar_sysrq_key; -static struct tty_struct *sysrq_tty; static void ctrlchar_handle_sysrq(struct work_struct *work) { - handle_sysrq(ctrlchar_sysrq_key, sysrq_tty); + handle_sysrq(ctrlchar_sysrq_key); } static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq); @@ -54,7 +53,6 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty) /* racy */ if (len == 3 && buf[1] == '-') { ctrlchar_sysrq_key = buf[2]; - sysrq_tty = tty; schedule_work(&ctrlchar_work); return CTRLCHAR_SYSRQ; } diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 18d9a497863b..8cd58e412b5e 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -305,7 +305,7 @@ kbd_keycode(struct kbd_data *kbd, unsigned int keycode) if (kbd->sysrq) { if (kbd->sysrq == K(KT_LATIN, '-')) { kbd->sysrq = 0; - handle_sysrq(value, kbd->tty); + handle_sysrq(value); return; } if (value == '-') { diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c index 7e5e5efea4e2..cff9a306660f 100644 --- a/drivers/serial/sn_console.c +++ b/drivers/serial/sn_console.c @@ -492,7 +492,7 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags) sysrq_requested = 0; if (ch && time_before(jiffies, sysrq_timeout)) { spin_unlock_irqrestore(&port->sc_port.lock, flags); - handle_sysrq(ch, NULL); + handle_sysrq(ch); spin_lock_irqsave(&port->sc_port.lock, flags); /* ignore actual sysrq command char */ continue; diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ca92f67747cc..1e846cc3c7a4 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -453,7 +453,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty, { if (port->sysrq && port->port.console) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, tty); + handle_sysrq(ch); port->sysrq = 0; return 1; } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 1799bd890315..ef9c7db52077 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -237,7 +237,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, goto again; if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); + handle_sysrq(sysrq_key); } static struct xenbus_watch sysrq_watch = { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3c2ad99fed34..64458a9a8938 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -465,7 +465,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) #ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, port->state->port.tty); + handle_sysrq(ch); port->sysrq = 0; return 1; } diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4ee650315119..387fa7d05c98 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -15,9 +15,7 @@ #define _LINUX_SYSRQ_H #include - -struct pt_regs; -struct tty_struct; +#include /* Possible values of bitmask for enabling sysrq functions */ /* 0x0001 is reserved for enable everything */ @@ -44,8 +42,8 @@ struct sysrq_key_op { * are available -- else NULL's). */ -void handle_sysrq(int key, struct tty_struct *tty); -void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +void handle_sysrq(int key); +void __handle_sysrq(int key, bool check_mask); int register_sysrq_key(int key, struct sysrq_key_op *op); int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); @@ -54,11 +52,11 @@ int sysrq_toggle_support(int enable_mask); #else -static inline void handle_sysrq(int key, struct tty_struct *tty) +static inline void handle_sysrq(int key) { } -static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +static inline void __handle_sysrq(int key, bool check_mask) { } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 28b844118bbd..caf057a3de0e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1929,7 +1929,7 @@ static int kdb_sr(int argc, const char **argv) if (argc != 1) return KDB_ARGCOUNT; kdb_trap_printk++; - __handle_sysrq(*argv[1], NULL, 0); + __handle_sysrq(*argv[1], false); kdb_trap_printk--; return 0; -- cgit v1.2.3-59-g8ed1b From 6ee9f4b4affe751d313d2538999aeec134d413a6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:47 -0700 Subject: USB: drop tty argument from usb_serial_handle_sysrq_char() Since handle_sysrq() does not take tty as argument anymore we can drop it from usb_serial_handle_sysrq_char() as well. Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- drivers/usb/serial/ftdi_sio.c | 2 +- drivers/usb/serial/generic.c | 8 +++----- drivers/usb/serial/pl2303.c | 2 +- drivers/usb/serial/ssu100.c | 2 +- include/linux/usb/serial.h | 3 +-- 5 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index eb12d9b096b4..5d47983b533c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1831,7 +1831,7 @@ static int ftdi_process_packet(struct tty_struct *tty, if (port->port.console && port->sysrq) { for (i = 0; i < len; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, flag); } } else { diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 1e846cc3c7a4..1abe34c38c08 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -343,7 +343,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb) tty_insert_flip_string(tty, ch, urb->actual_length); else { for (i = 0; i < urb->actual_length; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, TTY_NORMAL); } } @@ -448,8 +448,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty) EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle); #ifdef CONFIG_MAGIC_SYSRQ -int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, unsigned int ch) +int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { if (port->sysrq && port->port.console) { if (ch && time_before(jiffies, port->sysrq)) { @@ -462,8 +461,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty, return 0; } #else -int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, unsigned int ch) +int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { return 0; } diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 6b6001822279..34ad7b3d5948 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -788,7 +788,7 @@ static void pl2303_process_read_urb(struct urb *urb) if (port->port.console && port->sysrq) { for (i = 0; i < urb->actual_length; ++i) - if (!usb_serial_handle_sysrq_char(tty, port, data[i])) + if (!usb_serial_handle_sysrq_char(port, data[i])) tty_insert_flip_char(tty, data[i], tty_flag); } else { tty_insert_flip_string_fixed_flag(tty, data, tty_flag, diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index 6e82d4f54bc8..819de4740388 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -596,7 +596,7 @@ static int ssu100_process_packet(struct tty_struct *tty, if (port->port.console && port->sysrq) { for (i = 0; i < len; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, flag); } } else diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 84a4c44c208b..55675b1efb28 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -342,8 +342,7 @@ extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port, extern void usb_serial_generic_process_read_urb(struct urb *urb); extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size); -extern int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, +extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch); extern int usb_serial_handle_break(struct usb_serial_port *port); -- cgit v1.2.3-59-g8ed1b From 8905aaafb4b5d9764c5b4b54c7d03eb41bb0a7e9 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 19 Aug 2010 09:52:28 -0700 Subject: Input: uinput - add devname alias to allow module on-demand load Recent modprobe and udev versions allow to create device nodes for modules which are not loaded. Only the first access will cause the in-kernel module loader to pull-in the module. Systems which never access the device node will not needlessly load the module, and no longer need init scripts or other facilities to unconditionally load it. Signed-off-by: Kay Sievers Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 2 ++ include/linux/miscdevice.h | 1 + include/linux/uinput.h | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index bb53fd33cd1c..0d4266a533a5 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -811,6 +811,8 @@ static struct miscdevice uinput_misc = { .minor = UINPUT_MINOR, .name = UINPUT_NAME, }; +MODULE_ALIAS_MISCDEV(UINPUT_MINOR); +MODULE_ALIAS("devname:" UINPUT_NAME); static int __init uinput_init(void) { diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index bafffc737903..18fd13028ba1 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -33,6 +33,7 @@ #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MPT_MINOR 220 #define MPT2SAS_MINOR 221 +#define UINPUT_MINOR 223 #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 diff --git a/include/linux/uinput.h b/include/linux/uinput.h index 60c81da77f0f..05f7fed2b173 100644 --- a/include/linux/uinput.h +++ b/include/linux/uinput.h @@ -37,7 +37,6 @@ #define UINPUT_VERSION 3 #ifdef __KERNEL__ -#define UINPUT_MINOR 223 #define UINPUT_NAME "uinput" #define UINPUT_BUFFER_SIZE 16 #define UINPUT_NUM_REQUESTS 16 -- cgit v1.2.3-59-g8ed1b From 297c5eee372478fc32fec5fe8eed711eedb13f3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Aug 2010 16:24:55 -0700 Subject: mm: make the vma list be doubly linked It's a really simple list, and several of the users want to go backwards in it to find the previous vma. So rather than have to look up the previous entry with 'find_vma_prev()' or something similar, just make it doubly linked instead. Tested-by: Ian Campbell Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- kernel/fork.c | 7 +++++-- mm/mmap.c | 21 +++++++++++++++++---- mm/nommu.c | 7 +++++-- 4 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f37..ee7e258627f9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,7 +134,7 @@ struct vm_area_struct { within vm_mm. */ /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next; + struct vm_area_struct *vm_next, *vm_prev; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, see mm.h. */ diff --git a/kernel/fork.c b/kernel/fork.c index 856eac3ec52e..b7e9d60a675d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -300,7 +300,7 @@ out: #ifdef CONFIG_MMU static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { - struct vm_area_struct *mpnt, *tmp, **pprev; + struct vm_area_struct *mpnt, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; @@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; + prev = NULL; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; @@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; - tmp->vm_next = NULL; + tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; @@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ *pprev = tmp; pprev = &tmp->vm_next; + tmp->vm_prev = prev; + prev = tmp; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; diff --git a/mm/mmap.c b/mm/mmap.c index 31003338b978..331e51af38c9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -388,17 +388,23 @@ static inline void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + struct vm_area_struct *next; + + vma->vm_prev = prev; if (prev) { - vma->vm_next = prev->vm_next; + next = prev->vm_next; prev->vm_next = vma; } else { mm->mmap = vma; if (rb_parent) - vma->vm_next = rb_entry(rb_parent, + next = rb_entry(rb_parent, struct vm_area_struct, vm_rb); else - vma->vm_next = NULL; + next = NULL; } + vma->vm_next = next; + if (next) + next->vm_prev = vma; } void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, @@ -483,7 +489,11 @@ static inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev) { - prev->vm_next = vma->vm_next; + struct vm_area_struct *next = vma->vm_next; + + prev->vm_next = next; + if (next) + next->vm_prev = prev; rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; @@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); + vma->vm_prev = NULL; do { rb_erase(&vma->vm_rb, &mm->mm_rb); mm->map_count--; @@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; + if (vma) + vma->vm_prev = prev; tail_vma->vm_next = NULL; if (mm->unmap_area == arch_unmap_area) addr = prev ? prev->vm_end : mm->mmap_base; diff --git a/mm/nommu.c b/mm/nommu.c index efa9a380335e..88ff091eb07a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags) */ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct *pvma, **pp; + struct vm_area_struct *pvma, **pp, *next; struct address_space *mapping; struct rb_node **p, *parent; @@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) break; } - vma->vm_next = *pp; + next = *pp; *pp = vma; + vma->vm_next = next; + if (next) + next->vm_prev = vma; } /* -- cgit v1.2.3-59-g8ed1b From e36c886a0f9d624377977fa6cae309cfd7f362fa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 21 Aug 2010 13:07:26 -0700 Subject: workqueue: Add basic tracepoints to track workqueue execution With the introduction of the new unified work queue thread pools, we lost one feature: It's no longer possible to know which worker is causing the CPU to wake out of idle. The result is that PowerTOP now reports a lot of "kworker/a:b" instead of more readable results. This patch adds a pair of tracepoints to the new workqueue code, similar in style to the timer/hrtimer tracepoints. With this pair of tracepoints, the next PowerTOP can correctly report which work item caused the wakeup (and how long it took): Interrupt (43) i915 time 3.51ms wakeups 141 Work ieee80211_iface_work time 0.81ms wakeups 29 Work do_dbs_timer time 0.55ms wakeups 24 Process Xorg time 21.36ms wakeups 4 Timer sched_rt_period_timer time 0.01ms wakeups 1 Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/trace/events/workqueue.h | 62 ++++++++++++++++++++++++++++++++++++++++ kernel/workqueue.c | 9 ++++++ 2 files changed, 71 insertions(+) create mode 100644 include/trace/events/workqueue.h (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 000000000000..49682d7e9d60 --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,62 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include +#include + +/** + * workqueue_execute_start - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_start, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + ), + + TP_printk("work struct %p: function %pf", __entry->work, __entry->function) +); + +/** + * workqueue_execute_end - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_end, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + ), + + TP_fast_assign( + __entry->work = work; + ), + + TP_printk("work struct %p", __entry->work) +); + + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2994a0e3a61c..8bd600c020e5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -35,6 +35,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "workqueue_sched.h" enum { @@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work) work_clear_pending(work); lock_map_acquire(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); + trace_workqueue_execute_start(work); f(work); + /* + * While we must be careful to not use "work" after this, the trace + * point will only record its address. + */ + trace_workqueue_execute_end(work); lock_map_release(&lockdep_map); lock_map_release(&cwq->wq->lockdep_map); -- cgit v1.2.3-59-g8ed1b From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:50 -0400 Subject: fanotify: flush outstanding perm requests on group destroy When an fanotify listener is closing it may cause a deadlock between the listener and the original task doing an fs operation. If the original task is waiting for a permissions response it will be holding the srcu lock. The listener cannot clean up and exit until after that srcu lock is syncronized. Thus deadlock. The fix introduced here is to stop accepting new permissions events when a listener is shutting down and to grant permission for all outstanding events. Thus the original task will eventually release the srcu lock and the listener can complete shutdown. Reported-by: Andreas Gruenbacher Cc: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++ include/linux/fanotify.h | 7 ------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 032b837fcd11..b966b7230f47 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, re->fd = fd; mutex_lock(&group->fanotify_data.access_mutex); + + if (group->fanotify_data.bypass_perm) { + mutex_unlock(&group->fanotify_data.access_mutex); + kmem_cache_free(fanotify_response_event_cache, re); + event->response = FAN_ALLOW; + return 0; + } + list_add_tail(&re->list, &group->fanotify_data.access_list); mutex_unlock(&group->fanotify_data.access_mutex); @@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct fanotify_response_event *re, *lre; pr_debug("%s: file=%p group=%p\n", __func__, file, group); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + mutex_lock(&group->fanotify_data.access_mutex); + + group->fanotify_data.bypass_perm = true; + + list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { + pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, + re, re->event); + + list_del_init(&re->list); + re->event->response = FAN_ALLOW; + + kmem_cache_free(fanotify_response_event_cache, re); + } + mutex_unlock(&group->fanotify_data.access_mutex); + + wake_up(&group->fanotify_data.access_waitq); +#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_put_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index f0949a57ca9d..985435622ecd 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -95,11 +95,4 @@ struct fanotify_response { (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ (long)(meta)->event_len <= (long)(len)) -#ifdef __KERNEL__ - -struct fanotify_wait { - struct fsnotify_event *event; - __s32 fd; -}; -#endif /* __KERNEL__ */ #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ed36fb57c426..e40190d16878 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -156,6 +156,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; + bool bypass_perm; /* protected by access_mutex */ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; } fanotify_data; -- cgit v1.2.3-59-g8ed1b From 09cd2b99c6cdd1e14e84c1febca2fb91e9f4e5ba Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 17:25:05 +0000 Subject: header: fix broken headers for user space __packed is only defined in kernel space, so we should use __attribute__((packed)) for the code shared between kernel and user space. Two __attribute() annotations are replaced with __attribute__() too. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 +- include/linux/if_fddi.h | 8 ++++---- include/linux/if_hippi.h | 8 ++++---- include/linux/if_pppox.h | 10 +++++----- include/linux/ipv6.h | 4 ++-- include/linux/nbd.h | 2 +- include/linux/ncp.h | 10 +++++----- include/linux/netfilter/xt_IDLETIMER.h | 2 +- include/linux/phonet.h | 4 ++-- include/linux/rfkill.h | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index c831467774d0..bed7a4682b90 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -119,7 +119,7 @@ struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ -} __packed; +} __attribute__((packed)); #ifdef __KERNEL__ #include diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index 9947c39e62f6..e6dc11e7f9a5 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -67,7 +67,7 @@ struct fddi_8022_1_hdr { __u8 dsap; /* destination service access point */ __u8 ssap; /* source service access point */ __u8 ctrl; /* control byte #1 */ -} __packed; +} __attribute__((packed)); /* Define 802.2 Type 2 header */ struct fddi_8022_2_hdr { @@ -75,7 +75,7 @@ struct fddi_8022_2_hdr { __u8 ssap; /* source service access point */ __u8 ctrl_1; /* control byte #1 */ __u8 ctrl_2; /* control byte #2 */ -} __packed; +} __attribute__((packed)); /* Define 802.2 SNAP header */ #define FDDI_K_OUI_LEN 3 @@ -85,7 +85,7 @@ struct fddi_snap_hdr { __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ __be16 ethertype; /* packet type ID field */ -} __packed; +} __attribute__((packed)); /* Define FDDI LLC frame header */ struct fddihdr { @@ -98,7 +98,7 @@ struct fddihdr { struct fddi_8022_2_hdr llc_8022_2; struct fddi_snap_hdr llc_snap; } hdr; -} __packed; +} __attribute__((packed)); #ifdef __KERNEL__ #include diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index 5fe5f307c6f5..cdc049f1829a 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -104,7 +104,7 @@ struct hippi_fp_hdr { __be32 fixed; #endif __be32 d2_size; -} __packed; +} __attribute__((packed)); struct hippi_le_hdr { #if defined (__BIG_ENDIAN_BITFIELD) @@ -129,7 +129,7 @@ struct hippi_le_hdr { __u8 daddr[HIPPI_ALEN]; __u16 locally_administered; __u8 saddr[HIPPI_ALEN]; -} __packed; +} __attribute__((packed)); #define HIPPI_OUI_LEN 3 /* @@ -142,12 +142,12 @@ struct hippi_snap_hdr { __u8 ctrl; /* always 0x03 */ __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ __be16 ethertype; /* packet type ID field */ -} __packed; +} __attribute__((packed)); struct hippi_hdr { struct hippi_fp_hdr fp; struct hippi_le_hdr le; struct hippi_snap_hdr snap; -} __packed; +} __attribute__((packed)); #endif /* _LINUX_IF_HIPPI_H */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f162..27741e05446f 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -59,7 +59,7 @@ struct sockaddr_pppox { union{ struct pppoe_addr pppoe; }sa_addr; -} __packed; +} __attribute__((packed)); /* The use of the above union isn't viable because the size of this * struct must stay fixed over time -- applications use sizeof(struct @@ -70,7 +70,7 @@ struct sockaddr_pppol2tp { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ struct pppol2tp_addr pppol2tp; -} __packed; +} __attribute__((packed)); /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32 * bits. So we need a different sockaddr structure. @@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ struct pppol2tpv3_addr pppol2tp; -} __packed; +} __attribute__((packed)); /********************************************************************* * @@ -101,7 +101,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __attribute__ ((packed)); /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -129,7 +129,7 @@ struct pppoe_hdr { __be16 sid; __be16 length; struct pppoe_tag tag[0]; -} __packed; +} __attribute__((packed)); /* Length of entire PPPoE + PPP header */ #define PPPOE_SES_HLEN 8 diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ab9e9e89e407..e62683ba88e6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -58,7 +58,7 @@ struct ipv6_opt_hdr { /* * TLV encoded option data follows. */ -} __packed; /* required for some archs */ +} __attribute__((packed)); /* required for some archs */ #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr @@ -99,7 +99,7 @@ struct ipv6_destopt_hao { __u8 type; __u8 length; struct in6_addr addr; -} __packed; +} __attribute__((packed)); /* * IPv6 fixed header diff --git a/include/linux/nbd.h b/include/linux/nbd.h index bb58854a8061..d146ca10c0f5 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -88,7 +88,7 @@ struct nbd_request { char handle[8]; __be64 from; __be32 len; -} __packed; +} __attribute__((packed)); /* * This is the reply packet that nbd-server sends back to the client after diff --git a/include/linux/ncp.h b/include/linux/ncp.h index 3ace8370e61e..99f0adeeb3f3 100644 --- a/include/linux/ncp.h +++ b/include/linux/ncp.h @@ -27,7 +27,7 @@ struct ncp_request_header { __u8 conn_high; __u8 function; __u8 data[0]; -} __packed; +} __attribute__((packed)); #define NCP_REPLY (0x3333) #define NCP_WATCHDOG (0x3E3E) @@ -42,7 +42,7 @@ struct ncp_reply_header { __u8 completion_code; __u8 connection_state; __u8 data[0]; -} __packed; +} __attribute__((packed)); #define NCP_VOLNAME_LEN (16) #define NCP_NUMBER_OF_VOLUMES (256) @@ -158,7 +158,7 @@ struct nw_info_struct { #ifdef __KERNEL__ struct nw_nfs_info nfs; #endif -} __packed; +} __attribute__((packed)); /* modify mask - use with MODIFY_DOS_INFO structure */ #define DM_ATTRIBUTES (cpu_to_le32(0x02)) @@ -190,12 +190,12 @@ struct nw_modify_dos_info { __u16 inheritanceGrantMask; __u16 inheritanceRevokeMask; __u32 maximumSpace; -} __packed; +} __attribute__((packed)); struct nw_search_sequence { __u8 volNumber; __u32 dirBase; __u32 sequence; -} __packed; +} __attribute__((packed)); #endif /* _LINUX_NCP_H */ diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h index 3e1aa1be942e..208ae9387331 100644 --- a/include/linux/netfilter/xt_IDLETIMER.h +++ b/include/linux/netfilter/xt_IDLETIMER.h @@ -39,7 +39,7 @@ struct idletimer_tg_info { char label[MAX_IDLETIMER_LABEL_SIZE]; /* for kernel module internal use only */ - struct idletimer_tg *timer __attribute((aligned(8))); + struct idletimer_tg *timer __attribute__((aligned(8))); }; #endif diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 24426c3d6b5a..76edadf046d3 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -56,7 +56,7 @@ struct phonethdr { __be16 pn_length; __u8 pn_robj; __u8 pn_sobj; -} __packed; +} __attribute__((packed)); /* Common Phonet payload header */ struct phonetmsg { @@ -98,7 +98,7 @@ struct sockaddr_pn { __u8 spn_dev; __u8 spn_resource; __u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3]; -} __packed; +} __attribute__((packed)); /* Well known address */ #define PN_DEV_PC 0x10 diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 4f82326eb294..08c32e4f261a 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -81,7 +81,7 @@ struct rfkill_event { __u8 type; __u8 op; __u8 soft, hard; -} __packed; +} __attribute__((packed)); /* * We are planning to be backward and forward compatible with changes -- cgit v1.2.3-59-g8ed1b From c93a4dfb31f2c023da3ad1238c352452f2cc0e05 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:28 +0100 Subject: xen: pvhvm: allow user to request no emulated device unplug this allows the user to disable pvhvm and revert to emulated devices in case of a system misconfiguration (e.g. initramfs with only emulated drivers in it). Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 1 + arch/x86/xen/platform-pci-unplug.c | 5 +++++ include/xen/platform_pci.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2c85c0692b01..8bbe83b9d0b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2631,6 +2631,7 @@ and is between 256 and 4096 characters. It is defined in the file all -- unplug all emulated devices (NICs and IDE disks) ignore -- continue loading the Xen platform PCI driver even if the version check failed + never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] Format: diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 554c002a1e1a..070dfa0654bd 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -72,6 +72,9 @@ void __init xen_unplug_emulated_devices(void) { int r; + /* user explicitly requested no unplug */ + if (xen_emul_unplug & XEN_UNPLUG_NEVER) + return; /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. @@ -127,6 +130,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; else if (!strncmp(p, "ignore", l)) xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "never", l)) + xen_emul_unplug |= XEN_UNPLUG_NEVER; else printk(KERN_WARNING "unrecognised option '%s' " "in parameter 'xen_emul_unplug'\n", p); diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index ce9d671c636c..123b7752fa6a 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -21,6 +21,7 @@ #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 #define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3-59-g8ed1b From 1dc7ce99b091a11cce0f34456c1ffcb928f17edd Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:29 +0100 Subject: xen: pvhvm: rename xen_emul_unplug=ignore to =unnnecessary It is not immediately clear what this option causes to become ignored. The actual meaning is that it is not necessary to unplug the emulated devices to safely use the PV ones, even if the platform does not support the unplug protocol. (pressumably the user will only add this option if they have ensured that their domain configuration is safe). I think xen_emul_unplug=unnecessary better captures this. Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 5 +++-- arch/x86/xen/platform-pci-unplug.c | 13 +++++++------ drivers/block/xen-blkfront.c | 2 +- include/xen/platform_pci.h | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8bbe83b9d0b2..f084af0cb8e0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2629,8 +2629,9 @@ and is between 256 and 4096 characters. It is defined in the file aux-ide-disks -- unplug non-primary-master IDE devices nics -- unplug network devices all -- unplug all emulated devices (NICs and IDE disks) - ignore -- continue loading the Xen platform PCI driver even - if the version check failed + unnecessary -- unplugging emulated devices is + unnecessary even if the host did not respond to + the unplug protocol never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 070dfa0654bd..0f456386cce5 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -78,10 +78,11 @@ void __init xen_unplug_emulated_devices(void) /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. - * Also enable the Xen platform PCI driver if the version is really old - * and the user told us to ignore it. */ + * Also enable the Xen platform PCI driver if the host does + * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC) + * but the user told us that unplugging is unnecessary. */ if (r && !(r == XEN_PLATFORM_ERR_MAGIC && - (xen_emul_unplug & XEN_UNPLUG_IGNORE))) + (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))) return; /* Set the default value of xen_emul_unplug depending on whether or * not the Xen PV frontends and the Xen platform PCI driver have @@ -102,7 +103,7 @@ void __init xen_unplug_emulated_devices(void) } } /* Now unplug the emulated devices */ - if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) + if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)) outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); xen_platform_pci_unplug = xen_emul_unplug; } @@ -128,8 +129,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; else if (!strncmp(p, "nics", l)) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; - else if (!strncmp(p, "ignore", l)) - xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "unnecessary", l)) + xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY; else if (!strncmp(p, "never", l)) xen_emul_unplug |= XEN_UNPLUG_NEVER; else diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..ab735a605cf3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -834,7 +834,7 @@ static int blkfront_probe(struct xenbus_device *dev, char *type; int len; /* no unplug has been done: do not hook devices != xen vbds */ - if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { + if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { int major; if (!VDEV_IS_EXTENDED(vdevice)) diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 123b7752fa6a..590ccfd82645 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -20,7 +20,7 @@ #define XEN_UNPLUG_ALL_NICS 2 #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_UNNECESSARY 8 #define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { -- cgit v1.2.3-59-g8ed1b From 9c35e90c6fcf7f5baf27a63d9565e9f47633f299 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 12:01:35 +0100 Subject: xen: pvhvm: make it clearer that XEN_UNPLUG_* define bits in a bitfield by defining in terms of (1< Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- include/xen/platform_pci.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 590ccfd82645..a785a3b0c8c7 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -16,12 +16,15 @@ #define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */ #define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */ -#define XEN_UNPLUG_ALL_IDE_DISKS 1 -#define XEN_UNPLUG_ALL_NICS 2 -#define XEN_UNPLUG_AUX_IDE_DISKS 4 -#define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_UNNECESSARY 8 -#define XEN_UNPLUG_NEVER 16 +#define XEN_UNPLUG_ALL_IDE_DISKS (1<<0) +#define XEN_UNPLUG_ALL_NICS (1<<1) +#define XEN_UNPLUG_AUX_IDE_DISKS (1<<2) +#define XEN_UNPLUG_ALL (XEN_UNPLUG_ALL_IDE_DISKS|\ + XEN_UNPLUG_ALL_NICS|\ + XEN_UNPLUG_AUX_IDE_DISKS) + +#define XEN_UNPLUG_UNNECESSARY (1<<16) +#define XEN_UNPLUG_NEVER (1<<17) static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3-59-g8ed1b From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Aug 2010 13:52:19 +0200 Subject: block: add function call to switch the IO scheduler from a driver Currently drivers must do an elevator_exit() + elevator_init() to switch IO schedulers. There are a few problems with this: - Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96, elevator_init() requires a zeroed out q->elevator pointer. The two existing in-kernel users don't do that. - It will only work at initialization time, since using the above two-staged construct does not properly quisce the queue. So add elevator_change() which takes care of this, and convert the elv_iosched_store() sysfs interface to use this helper as well. Reported-by: Peter Oberparleiter Reported-by: Kevin Vigor Signed-off-by: Jens Axboe --- block/elevator.c | 44 +++++++++++++++++++++++++++++++------------- include/linux/elevator.h | 1 + 2 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/elevator.c b/block/elevator.c index ec585c9554d3..205b09a5bd9e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1009,18 +1009,19 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { struct elevator_queue *old_elevator, *e; void *data; + int err; /* * Allocate new elevator */ e = elevator_alloc(q, new_e); if (!e) - return 0; + return -ENOMEM; data = elevator_init_queue(q, e); if (!data) { kobject_put(&e->kobj); - return 0; + return -ENOMEM; } /* @@ -1043,7 +1044,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) __elv_unregister_queue(old_elevator); - if (elv_register_queue(q)) + err = elv_register_queue(q); + if (err) goto fail_register; /* @@ -1056,7 +1058,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); - return 1; + return 0; fail_register: /* @@ -1071,17 +1073,19 @@ fail_register: queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); - return 0; + return err; } -ssize_t elv_iosched_store(struct request_queue *q, const char *name, - size_t count) +/* + * Switch this queue to the given IO scheduler. + */ +int elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; if (!q->elevator) - return count; + return -ENXIO; strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name)); @@ -1092,13 +1096,27 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { elevator_put(e); - return count; + return 0; } - if (!elevator_switch(q, e)) - printk(KERN_ERR "elevator: switch to %s failed\n", - elevator_name); - return count; + return elevator_switch(q, e); +} +EXPORT_SYMBOL(elevator_change); + +ssize_t elv_iosched_store(struct request_queue *q, const char *name, + size_t count) +{ + int ret; + + if (!q->elevator) + return count; + + ret = elevator_change(q, name); + if (!ret) + return count; + + printk(KERN_ERR "elevator: switch to %s failed\n", name); + return ret; } ssize_t elv_iosched_show(struct request_queue *q, char *name) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4fce1e..926b50322a46 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); +extern int elevator_change(struct request_queue *, const char *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* -- cgit v1.2.3-59-g8ed1b From 8488a38f4d2f43bd55a3e0db4cd57a5bef3af6d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Aug 2010 15:01:02 +0100 Subject: kobject: Break the kobject namespace defs into their own header Break the kobject namespace defs into their own header to avoid a header file inclusion ordering problem between linux/sysfs.h and linux/kobject.h. This fixes the build breakage on older versions of gcc. Signed-off-by: David Howells Cc: Eric Biederman Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 35 +---------------------------- include/linux/kobject_ns.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 1 + 3 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 include/linux/kobject_ns.h (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index cf343a852534..7950a37a7146 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -136,42 +137,8 @@ struct kobj_attribute { extern const struct sysfs_ops kobj_sysfs_ops; -/* - * Namespace types which are used to tag kobjects and sysfs entries. - * Network namespace will likely be the first. - */ -enum kobj_ns_type { - KOBJ_NS_TYPE_NONE = 0, - KOBJ_NS_TYPE_NET, - KOBJ_NS_TYPES -}; - struct sock; -/* - * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace - * @netlink_ns: return namespace to which a sock belongs (right?) - * @initial_ns: return the initial namespace (i.e. init_net_ns) - */ -struct kobj_ns_type_operations { - enum kobj_ns_type type; - const void *(*current_ns)(void); - const void *(*netlink_ns)(struct sock *sk); - const void *(*initial_ns)(void); -}; - -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); -int kobj_ns_type_registered(enum kobj_ns_type type); -const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); -const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); - -const void *kobj_ns_current(enum kobj_ns_type type); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); -const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); - - /** * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem. * diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h new file mode 100644 index 000000000000..82cb5bf461fb --- /dev/null +++ b/include/linux/kobject_ns.h @@ -0,0 +1,56 @@ +/* Kernel object name space definitions + * + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * Copyright (c) 2006-2008 Greg Kroah-Hartman + * Copyright (c) 2006-2008 Novell Inc. + * + * Split from kobject.h by David Howells (dhowells@redhat.com) + * + * This file is released under the GPLv2. + * + * Please read Documentation/kobject.txt before using the kobject + * interface, ESPECIALLY the parts about reference counts and object + * destructors. + */ + +#ifndef _LINUX_KOBJECT_NS_H +#define _LINUX_KOBJECT_NS_H + +struct sock; +struct kobject; + +/* + * Namespace types which are used to tag kobjects and sysfs entries. + * Network namespace will likely be the first. + */ +enum kobj_ns_type { + KOBJ_NS_TYPE_NONE = 0, + KOBJ_NS_TYPE_NET, + KOBJ_NS_TYPES +}; + +/* + * Callbacks so sysfs can determine namespaces + * @current_ns: return calling task's namespace + * @netlink_ns: return namespace to which a sock belongs (right?) + * @initial_ns: return the initial namespace (i.e. init_net_ns) + */ +struct kobj_ns_type_operations { + enum kobj_ns_type type; + const void *(*current_ns)(void); + const void *(*netlink_ns)(struct sock *sk); + const void *(*initial_ns)(void); +}; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int kobj_ns_type_registered(enum kobj_ns_type type); +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); + +const void *kobj_ns_current(enum kobj_ns_type type); +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); +const void *kobj_ns_initial(enum kobj_ns_type type); +void kobj_ns_exit(enum kobj_ns_type type, const void *ns); + +#endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 3c92121ba9af..96eb576d82fd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -16,6 +16,7 @@ #include #include #include +#include #include struct kobject; -- cgit v1.2.3-59-g8ed1b From d187abb9a83e6c6b6e9f2ca17962bdeafb4bc903 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Aug 2010 12:07:13 -0700 Subject: USB: gadget: fix composite kernel-doc warnings Warning(include/linux/usb/composite.h:284): No description found for parameter 'disconnect' Warning(drivers/usb/gadget/composite.c:744): No description found for parameter 'c' Warning(drivers/usb/gadget/composite.c:744): Excess function parameter 'cdev' description in 'usb_string_ids_n' Signed-off-by: Randy Dunlap Cc: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 4 ++-- include/linux/usb/composite.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index e483f80822d2..1160c55de7f2 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -723,12 +723,12 @@ int usb_string_ids_tab(struct usb_composite_dev *cdev, struct usb_string *str) /** * usb_string_ids_n() - allocate unused string IDs in batch - * @cdev: the device whose string descriptor IDs are being allocated + * @c: the device whose string descriptor IDs are being allocated * @n: number of string IDs to allocate * Context: single threaded during gadget setup * * Returns the first requested ID. This ID and next @n-1 IDs are now - * valid IDs. At least providind that @n is non zore because if it + * valid IDs. At least provided that @n is non-zero because if it * is, returns last requested ID which is now very useful information. * * @usb_string_ids_n() is called from bind() callbacks to allocate diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 890bc1472190..617068134ae8 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -247,6 +247,7 @@ int usb_add_config(struct usb_composite_dev *, * value; it should return zero on successful initialization. * @unbind: Reverses @bind(); called as a side effect of unregistering * this driver. + * @disconnect: optional driver disconnect method * @suspend: Notifies when the host stops sending USB traffic, * after function notifications * @resume: Notifies configuration when the host restarts USB traffic, -- cgit v1.2.3-59-g8ed1b From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Aug 2010 14:22:47 +0200 Subject: workqueue: improve destroy_workqueue() debuggability Now that the worklist is global, having works pending after wq destruction can easily lead to oops and destroy_workqueue() have several BUG_ON()s to catch these cases. Unfortunately, BUG_ON() doesn't tell much about how the work became pending after the final flush_workqueue(). This patch adds WQ_DYING which is set before the final flush begins. If a work is requested to be queued on a dying workqueue, WARN_ON_ONCE() is triggered and the request is ignored. This clearly indicates which caller is trying to queue a work on a dying workqueue and keeps the system working in most cases. Locking rule comment is updated such that the 'I' rule includes modifying the field from destruction path. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 ++ kernel/workqueue.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4f9d277bcd9a..c959666eafca 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -241,6 +241,8 @@ enum { WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cc3456f96c56..362b50d092e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -87,7 +87,8 @@ enum { /* * Structure fields follow one of the following exclusion rules. * - * I: Set during initialization and read-only afterwards. + * I: Modifiable by initialization/destruction paths and read-only for + * everyone else. * * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. @@ -944,6 +945,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, debug_work_activate(work); + if (WARN_ON_ONCE(wq->flags & WQ_DYING)) + return; + /* determine gcwq to use */ if (!(wq->flags & WQ_UNBOUND)) { struct global_cwq *last_gcwq; @@ -2828,6 +2832,7 @@ void destroy_workqueue(struct workqueue_struct *wq) { unsigned int cpu; + wq->flags |= WQ_DYING; flush_workqueue(wq); /* -- cgit v1.2.3-59-g8ed1b From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 24 Aug 2010 11:44:18 -0700 Subject: guard page for stacks that grow upwards pa-risc and ia64 have stacks that grow upwards. Check that they do not run into other mappings. By making VM_GROWSUP 0x0 on architectures that do not ever use it, we can avoid some unpleasant #ifdefs in check_stack_guard_page(). Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 +++++++- mm/memory.c | 15 +++++++++++---- mm/mmap.c | 3 --- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 709f6728fc90..831c693416b2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -78,7 +78,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_MAYSHARE 0x00000080 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) #define VM_GROWSUP 0x00000200 +#else +#define VM_GROWSUP 0x00000000 +#endif #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ @@ -1330,8 +1334,10 @@ unsigned long ra_submit(struct file_ra_state *ra, /* Do stack extension */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -#ifdef CONFIG_IA64 +#if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); +#else + #define expand_upwards(vma, address) do { } while (0) #endif extern int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address); diff --git a/mm/memory.c b/mm/memory.c index 2ed2267439df..6b2ab1051851 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2760,11 +2760,9 @@ out_release: } /* - * This is like a special single-page "expand_downwards()", - * except we must first make sure that 'address-PAGE_SIZE' + * This is like a special single-page "expand_{down|up}wards()", + * except we must first make sure that 'address{-|+}PAGE_SIZE' * doesn't hit another vma. - * - * The "find_vma()" will do the right thing even if we wrap */ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) { @@ -2783,6 +2781,15 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo expand_stack(vma, address - PAGE_SIZE); } + if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { + struct vm_area_struct *next = vma->vm_next; + + /* As VM_GROWSDOWN but s/below/above/ */ + if (next && next->vm_start == address + PAGE_SIZE) + return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; + + expand_upwards(vma, address + PAGE_SIZE); + } return 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 331e51af38c9..6128dc8e5ede 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1716,9 +1716,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -#ifndef CONFIG_IA64 -static -#endif int expand_upwards(struct vm_area_struct *vma, unsigned long address) { int error; -- cgit v1.2.3-59-g8ed1b From 2b8fd9186d9275b07aef43e5bb4e98cd571f9a7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:55:59 +0200 Subject: ACPI/PCI: Do not preserve _OSC control bits returned by a query There is the assumption in acpi_pci_osc_control_set() that it is always sufficient to compare the mask of _OSC control bits to be requested with the result of an _OSC query where all of the known control bits have been checked. However, in general, that need not be the case. For example, if an _OSC feature A depends on an _OSC feature B and control of A, B plus another _OSC feature C is requested simultaneously, the BIOS may return A, B, C, while it would only return C if A and C were requested without B. That may result in passing a wrong mask of _OSC control bits to an _OSC control request, in which case the BIOS may only grant control of a subset of the requested features. Moreover, acpi_pci_run_osc() will return error code if that happens and the caller of acpi_pci_osc_control_set() will not know that it's been granted control of some _OSC features. Consequently, the system will generally not work as expected. Apart from this acpi_pci_osc_control_set() always uses the mask of _OSC control bits returned by the very first invocation of acpi_pci_query_osc(), but that is done with the second argument equal to OSC_PCI_SEGMENT_GROUPS_SUPPORT which generally happens to affect the returned _OSC control bits. For these reasons, make acpi_pci_osc_control_set() always check if control of the requested _OSC features will be granted before making the final control request. As a result, the osc_control_qry and osc_queried members of struct acpi_pci_root are not necessary any more, so drop them and remove the remaining code referring to them. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 20 +++++++------------- include/acpi/acpi_bus.h | 3 --- 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d2ae816df0f5..77cd19697b1e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -249,12 +249,8 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, status = acpi_pci_run_osc(root->device->handle, capbuf, &result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - if (control) { + if (control) *control = result; - } else { - root->osc_control_qry = result; - root->osc_queried = 1; - } } return status; } @@ -409,14 +405,12 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) goto out; /* Need to query controls first before requesting them */ - if (!root->osc_queried) { - status = acpi_pci_query_osc(root, root->osc_support_set, NULL); - if (ACPI_FAILURE(status)) - goto out; - } - if ((root->osc_control_qry & control_req) != control_req) { - printk(KERN_DEBUG - "Firmware did not grant requested _OSC control\n"); + flags = control_req; + status = acpi_pci_query_osc(root, root->osc_support_set, &flags); + if (ACPI_FAILURE(status)) + goto out; + + if (flags != control_req) { status = AE_SUPPORT; goto out; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index baacd98e7cc6..4de84ce3a927 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -377,9 +377,6 @@ struct acpi_pci_root { u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ - u32 osc_control_qry; /* the latest _OSC query result */ - - u32 osc_queried:1; /* has _OSC control been queried? */ }; /* helper */ -- cgit v1.2.3-59-g8ed1b From 75fb60f26befb59dbfa05cb122972642b7bdd219 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:53:11 +0200 Subject: ACPI/PCI: Negotiate _OSC control bits before requesting them It is possible that the BIOS will not grant control of all _OSC features requested via acpi_pci_osc_control_set(), so it is recommended to negotiate the final set of _OSC features with the query flag set before calling _OSC to request control of these features. To implement it, rework acpi_pci_osc_control_set() so that the caller can specify the mask of _OSC control bits to negotiate and the mask of _OSC control bits that are absolutely necessary to it. Then, acpi_pci_osc_control_set() will run _OSC queries in a loop until the mask of _OSC control bits returned by the BIOS is equal to the mask passed to it. Also, before running the _OSC request acpi_pci_osc_control_set() will check if the caller's required control bits are present in the final mask. Using this mechanism we will be able to avoid situations in which the BIOS doesn't grant control of certain _OSC features, because they depend on some other _OSC features that have not been requested. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 59 +++++++++++++++++++++++------------- drivers/pci/hotplug/acpi_pcihp.c | 2 +- drivers/pci/pcie/aer/aerdrv_acpi.c | 6 ++-- drivers/pci/pcie/pme/pcie_pme_acpi.c | 8 +++-- include/linux/acpi.h | 4 +-- 5 files changed, 49 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 77cd19697b1e..c34713112520 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -374,21 +374,32 @@ out: EXPORT_SYMBOL_GPL(acpi_get_pci_dev); /** - * acpi_pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits + * acpi_pci_osc_control_set - Request control of PCI root _OSC features. + * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). + * @mask: Mask of _OSC bits to request control of, place to store control mask. + * @req: Mask of _OSC bits the control of is essential to the caller. * - * Attempt to take control from Firmware on requested control bits. + * Run _OSC query for @mask and if that is successful, compare the returned + * mask of control bits with @req. If all of the @req bits are set in the + * returned mask, run _OSC request for it. + * + * The variable at the @mask address may be modified regardless of whether or + * not the function returns success. On success it will contain the mask of + * _OSC bits the BIOS has granted control of, but its contents are meaningless + * on failure. **/ -acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req) { + struct acpi_pci_root *root; acpi_status status; - u32 control_req, result, capbuf[3]; + u32 ctrl, capbuf[3]; acpi_handle tmp; - struct acpi_pci_root *root; - control_req = (flags & OSC_PCI_CONTROL_MASKS); - if (!control_req) + if (!mask) + return AE_BAD_PARAMETER; + + ctrl = *mask & OSC_PCI_CONTROL_MASKS; + if ((ctrl & req) != req) return AE_TYPE; root = acpi_pci_find_root(handle); @@ -400,27 +411,33 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) return status; mutex_lock(&osc_lock); + + *mask = ctrl | root->osc_control_set; /* No need to evaluate _OSC if the control was already granted. */ - if ((root->osc_control_set & control_req) == control_req) + if ((root->osc_control_set & ctrl) == ctrl) goto out; - /* Need to query controls first before requesting them */ - flags = control_req; - status = acpi_pci_query_osc(root, root->osc_support_set, &flags); - if (ACPI_FAILURE(status)) - goto out; + /* Need to check the available controls bits before requesting them. */ + while (*mask) { + status = acpi_pci_query_osc(root, root->osc_support_set, mask); + if (ACPI_FAILURE(status)) + goto out; + if (ctrl == *mask) + break; + ctrl = *mask; + } - if (flags != control_req) { + if ((ctrl & req) != req) { status = AE_SUPPORT; goto out; } capbuf[OSC_QUERY_TYPE] = 0; capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; - capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; - status = acpi_pci_run_osc(handle, capbuf, &result); + capbuf[OSC_CONTROL_TYPE] = ctrl; + status = acpi_pci_run_osc(handle, capbuf, mask); if (ACPI_SUCCESS(status)) - root->osc_control_set = result; + root->osc_control_set = *mask; out: mutex_unlock(&osc_lock); return status; @@ -551,8 +568,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (flags != base_flags) acpi_pci_osc_support(root, flags); - status = acpi_pci_osc_control_set(root->device->handle, - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + status = acpi_pci_osc_control_set(root->device->handle, &flags, flags); if (ACPI_FAILURE(status)) { printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n"); diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 45fcc1e96df9..3d93d529a7bd 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -360,7 +360,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = acpi_pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, &flags, flags); if (ACPI_SUCCESS(status)) goto got_one; if (status == AE_SUPPORT) diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index f278d7b0d95d..3a276a0cea93 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -39,9 +39,9 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_AER_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + u32 flags = OSC_PCI_EXPRESS_AER_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + status = acpi_pci_osc_control_set(handle, &flags, flags); } if (ACPI_FAILURE(status)) { diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c index 83ab2287ae3f..be20222b12d3 100644 --- a/drivers/pci/pcie/pme/pcie_pme_acpi.c +++ b/drivers/pci/pcie/pme/pcie_pme_acpi.c @@ -28,6 +28,7 @@ int pcie_pme_acpi_setup(struct pcie_device *srv) acpi_status status = AE_NOT_FOUND; struct pci_dev *port = srv->port; acpi_handle handle; + u32 flags; int error = 0; if (acpi_pci_disabled) @@ -39,9 +40,10 @@ int pcie_pme_acpi_setup(struct pcie_device *srv) if (!handle) return -EINVAL; - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_PME_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + flags = OSC_PCI_EXPRESS_PME_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + + status = acpi_pci_osc_control_set(handle, &flags, flags); if (ACPI_FAILURE(status)) { dev_info(&port->dev, "Failed to receive control of PCIe PME service: %s\n", diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ccf94dc5acdf..c227757feb06 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); OSC_PCI_EXPRESS_PME_CONTROL | \ OSC_PCI_EXPRESS_AER_CONTROL | \ OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - -extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, + u32 *mask, u32 req); extern void acpi_early_init(void); #else /* !CONFIG_ACPI */ -- cgit v1.2.3-59-g8ed1b From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Aug 2010 10:33:56 +0200 Subject: workqueue: fix cwq->nr_active underflow cwq->nr_active is used to keep track of how many work items are active for the cpu workqueue, where 'active' is defined as either pending on global worklist or executing. This is used to implement the max_active limit and workqueue freezing. If a work item is queued after nr_active has already reached max_active, the work item doesn't increment nr_active and is put on the delayed queue and gets activated later as previous active work items retire. try_to_grab_pending() which is used in the cancellation path unconditionally decremented nr_active whether the work item being cancelled is currently active or delayed, so cancelling a delayed work item makes nr_active underflow. This breaks max_active enforcement and triggers BUG_ON() in destroy_workqueue() later on. This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which is set while a work item in on the delayed list and making try_to_grab_pending() decrement nr_active iff the work item is currently active. The addition of the flag enlarges cwq alignment to 256 bytes which is getting a bit too large. It's scheduled to be reduced back to 128 bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next devel cycle. Signed-off-by: Tejun Heo Reported-by: Johannes Berg --- include/linux/workqueue.h | 16 +++++++++------- kernel/workqueue.c | 30 ++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index c959666eafca..f11100f96482 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_CWQ_BIT = 1, /* data points to cwq */ - WORK_STRUCT_LINKED_BIT = 2, /* next work is linked to this one */ + WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 3, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ + WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ #else - WORK_STRUCT_COLOR_SHIFT = 3, /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ #endif WORK_STRUCT_COLOR_BITS = 4, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -59,8 +61,8 @@ enum { /* * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 128 bytes which isn't too - * excessive while allowing 15 workqueue flush colors. + * off. This makes cwqs aligned to 256 bytes and allows 15 + * workqueue flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 362b50d092e2..a2dccfca03ba 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -941,6 +941,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct global_cwq *gcwq; struct cpu_workqueue_struct *cwq; struct list_head *worklist; + unsigned int work_flags; unsigned long flags; debug_work_activate(work); @@ -990,14 +991,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, BUG_ON(!list_empty(&work->entry)); cwq->nr_in_flight[cwq->work_color]++; + work_flags = work_color_to_flags(cwq->work_color); if (likely(cwq->nr_active < cwq->max_active)) { cwq->nr_active++; worklist = gcwq_determine_ins_pos(gcwq, cwq); - } else + } else { + work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; + } - insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color)); + insert_work(cwq, work, worklist, work_flags); spin_unlock_irqrestore(&gcwq->lock, flags); } @@ -1666,6 +1670,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); move_linked_works(work, pos, NULL); + __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); cwq->nr_active++; } @@ -1673,6 +1678,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest * @color: color of work which left the queue + * @delayed: for a delayed work * * A work either has completed or is removed from pending queue, * decrement nr_in_flight of its cwq and handle workqueue flushing. @@ -1680,19 +1686,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * CONTEXT: * spin_lock_irq(gcwq->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) +static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, + bool delayed) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) return; cwq->nr_in_flight[color]--; - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { - /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + if (!delayed) { + cwq->nr_active--; + if (!list_empty(&cwq->delayed_works)) { + /* one down, submit a delayed one */ + if (cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); + } } /* is flush in progress and are we at the flushing tip? */ @@ -1823,7 +1832,7 @@ __acquires(&gcwq->lock) hlist_del_init(&worker->hentry); worker->current_work = NULL; worker->current_cwq = NULL; - cwq_dec_nr_in_flight(cwq, work_color); + cwq_dec_nr_in_flight(cwq, work_color, false); } /** @@ -2388,7 +2397,8 @@ static int try_to_grab_pending(struct work_struct *work) debug_work_deactivate(work); list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work)); + get_work_color(work), + *work_data_bits(work) & WORK_STRUCT_DELAYED); ret = 1; } } -- cgit v1.2.3-59-g8ed1b From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Aug 2010 02:27:49 -0700 Subject: tcp: Combat per-cpu skew in orphan tests. As reported by Anton Blanchard when we use percpu_counter_read_positive() to make our orphan socket limit checks, the check can be off by up to num_cpus_online() * batch (which is 32 by default) which on a 128 cpu machine can be as large as the default orphan limit itself. Fix this by doing the full expensive sum check if the optimized check triggers. Reported-by: Anton Blanchard Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/tcp.h | 18 ++++++++++++++---- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_timer.c | 8 ++++---- 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..eaa9582779d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline int tcp_too_many_orphans(struct sock *sk, int num) +static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { - return (num > sysctl_tcp_max_orphans) || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); + struct percpu_counter *ocp = sk->sk_prot->orphan_count; + int orphans = percpu_counter_read_positive(ocp); + + if (orphans << shift > sysctl_tcp_max_orphans) { + orphans = percpu_counter_sum_positive(ocp); + if (orphans << shift > sysctl_tcp_max_orphans) + return true; + } + + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + return true; + return false; } /* syncookies: remember time of last synqueue overflow */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..197b9b77fa3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2011,11 +2011,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..c35b469e851c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); -- cgit v1.2.3-59-g8ed1b From 04cbe1de6fbda9649a6f25666194e6955d3e717e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Aug 2010 21:29:43 +0100 Subject: vgaarb: Wrap vga_(get|put) in CONFIG_VGA_ARB Fix link failure without the vga arbitrator. Signed-off-by: Chris Wilson Cc: Dave Airlie Cc: Jesse Barnes Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 6228b5b77d35..e9e1524b582c 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -93,8 +93,11 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev, * Nested calls are supported (a per-resource counter is maintained) */ -extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, - int interruptible); +#if defined(CONFIG_VGA_ARB) +extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible); +#else +static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { return 0; } +#endif /** * vga_get_interruptible @@ -131,7 +134,11 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, * are already locked by another card. It can be called in any context */ +#if defined(CONFIG_VGA_ARB) extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); +#else +static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } +#endif /** * vga_put - release lock on legacy VGA resources @@ -146,7 +153,11 @@ extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); * released if the counter reaches 0. */ +#if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); +#else +#define vga_put(pdev, rsrc) +#endif /** -- cgit v1.2.3-59-g8ed1b From 0fb85621df4f9f7c663c6c77c302e821a832c95e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 20 Aug 2010 10:02:15 +0100 Subject: fanotify: resize pid and reorder structure resize pid and reorder the fanotify_event_metadata so it is naturally aligned and we can work towards dropping the packed attributed Signed-off-by: Tvrtko Ursulin Cc: Andreas Dilger Signed-off-by: Eric Paris --- include/linux/fanotify.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 985435622ecd..63531a6b4d2a 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -65,14 +65,14 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 1 +#define FANOTIFY_METADATA_VERSION 2 struct fanotify_event_metadata { __u32 event_len; __u32 vers; - __s32 fd; __u64 mask; - __s64 pid; + __s32 fd; + __s32 pid; } __attribute__ ((packed)); struct fanotify_response { -- cgit v1.2.3-59-g8ed1b From bad849b3dc0fae1297c8d47f846f8d202a6145ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Aug 2010 16:00:34 +0100 Subject: NOMMU: Stub out vm_get_page_prot() if there's no MMU Stub out vm_get_page_prot() if there's no MMU. This was added by commit 804af2cf6e7a ("[AGPGART] remove private page protection map") and is used in commit c07fbfd17e61 ("fbmem: VM_IO set, but not propagated") in the fbmem video driver, but the function doesn't exist on NOMMU, resulting in an undefined symbol at link time. Signed-off-by: David Howells Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 831c693416b2..e6b1210772ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1363,7 +1363,15 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +#ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); +#else +static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return __pgprot(0); +} +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); -- cgit v1.2.3-59-g8ed1b From a28dec2f26013aad89446b1f708f948617bc28a2 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 8 Aug 2010 18:03:33 +0400 Subject: powerpc/85xx: Add P1021 PCI IDs and quirks This is needed for proper PCI-E support on P1021 SoCs. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 209384b6e039..4ae933225251 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -399,6 +399,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..10d33309e9a6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2300,6 +2300,8 @@ #define PCI_DEVICE_ID_P2010 0x0079 #define PCI_DEVICE_ID_P1020E 0x0100 #define PCI_DEVICE_ID_P1020 0x0101 +#define PCI_DEVICE_ID_P1021E 0x0102 +#define PCI_DEVICE_ID_P1021 0x0103 #define PCI_DEVICE_ID_P1011E 0x0108 #define PCI_DEVICE_ID_P1011 0x0109 #define PCI_DEVICE_ID_P1022E 0x0110 -- cgit v1.2.3-59-g8ed1b From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 1 Sep 2010 08:55:24 -0600 Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared, so the following pops up on PowerPC: cc1: warnings being treated as errors In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19: include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared inside parameter list include/linux/of_gpio.h:74: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared inside parameter list make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1 This patch fixes the issue by providing the proper forward declaration. Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 03f616b78cfa..e41f7dd1ae67 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,7 @@ #include struct device; +struct gpio_chip; /* * Some platforms don't support the GPIO programming interface. -- cgit v1.2.3-59-g8ed1b From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 2 Sep 2010 15:48:16 -0700 Subject: mutex: Fix annotations to include it in kernel-locking docbook Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c, then add these 2 files to the kernel-locking docbook as the Mutex API reference chapter. Add one API function to mutex-design.txt and correct a typo in that file. Signed-off-by: Randy Dunlap Cc: Rusty Russell LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 6 ++++++ Documentation/mutex-design.txt | 3 ++- include/linux/mutex.h | 8 ++++++++ kernel/mutex.c | 23 +++++++---------------- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 0b1a3f97f285..a0d479d1e1dd 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1961,6 +1961,12 @@ machines due to caching. + + Mutex API reference +!Iinclude/linux/mutex.h +!Ekernel/mutex.c + + Further reading diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index c91ccc0720fa..38c10fd7f411 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler mutex semantics are sufficient for your code, then there are a couple of advantages of mutexes: - - 'struct mutex' is smaller on most architectures: .e.g on x86, + - 'struct mutex' is smaller on most architectures: E.g. on x86, 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. A smaller structure size means less RAM footprint, and better CPU-cache utilization. @@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined: void mutex_lock_nested(struct mutex *lock, unsigned int subclass); int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4f5fcc..f363bc8fdc74 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,6 +78,14 @@ struct mutex_waiter { # include #else # define __DEBUG_MUTEX_INITIALIZER(lockname) +/** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ # define mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ diff --git a/kernel/mutex.c b/kernel/mutex.c index 4c0b7b3e6d2e..200407c1502f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -36,15 +36,6 @@ # include #endif -/*** - * mutex_init - initialize the mutex - * @lock: the mutex to be initialized - * @key: the lock_class_key for the class; used by mutex lock debugging - * - * Initialize the mutex to unlocked state. - * - * It is not allowed to initialize an already locked mutex. - */ void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_lock - acquire the mutex * @lock: the mutex to be acquired * @@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock); static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_unlock - release the mutex * @lock: the mutex to be released * @@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count); static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count); -/*** - * mutex_lock_interruptible - acquire the mutex, interruptable +/** + * mutex_lock_interruptible - acquire the mutex, interruptible * @lock: the mutex to be acquired * * Lock the mutex like mutex_lock(), and return 0 if the mutex has @@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) return prev == 1; } -/*** - * mutex_trylock - try acquire the mutex, without waiting +/** + * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired * * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. * * NOTE: this function follows the spin_trylock() convention, so - * it is negated to the down_trylock() return values! Be careful + * it is negated from the down_trylock() return values! Be careful * about this when converting semaphore users to mutexes. * * This function must not be used in interrupt context. The -- cgit v1.2.3-59-g8ed1b From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Sep 2010 15:42:43 +0000 Subject: cls_cgroup: Fix rcu lockdep warning Dave reported an rcu lockdep warning on 2.6.35.4 kernel task->cgroups and task->cgroups->subsys[i] are protected by RCU. So we avoid accessing invalid pointers here. This might happen, for example, when you are deref-ing those pointers while someone move @task from one cgroup to another. Reported-by: Dave Jones Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..ef6c24a529e1 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -27,11 +27,17 @@ struct cgroup_cls_state #ifdef CONFIG_NET_CLS_CGROUP static inline u32 task_cls_classid(struct task_struct *p) { + int classid; + if (in_interrupt()) return 0; - return container_of(task_subsys_state(p, net_cls_subsys_id), - struct cgroup_cls_state, css)->classid; + rcu_read_lock(); + classid = container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css)->classid; + rcu_read_unlock(); + + return classid; } #else extern int net_cls_subsys_id; -- cgit v1.2.3-59-g8ed1b From 71cad0554956de87c3fc413b1eac9313887eb14f Mon Sep 17 00:00:00 2001 From: Philippe Langlais Date: Tue, 31 Aug 2010 14:19:09 +0200 Subject: serial: fix port type conflict between NS16550A & U6_16550A Bug seen by Dr. David Alan Gilbert with sparse Signed-off-by: Philippe Langlais Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 3 +-- include/linux/serial_core.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ebc694a6d52..ef914061511e 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -77,8 +77,7 @@ struct serial_struct { #define PORT_16654 11 #define PORT_16850 12 #define PORT_RSA 13 /* RSA-DV II/S card */ -#define PORT_U6_16550A 14 -#define PORT_MAX 14 +#define PORT_MAX 13 #define SERIAL_IO_PORT 0 #define SERIAL_IO_HUB6 1 diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 64458a9a8938..563e23400913 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -44,7 +44,8 @@ #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ #define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ -#define PORT_MAX_8250 18 /* max port ID */ +#define PORT_U6_16550A 19 /* ST-Ericsson U6xxx internal UART */ +#define PORT_MAX_8250 19 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3-59-g8ed1b From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 4 Sep 2010 22:56:44 +0200 Subject: io-mapping: Fix the address space annotations Fixes a bunch of sparse warnings in io-mapping.h because of the inconsistent __iomem usage. Signed-off-by: Francisco Jerez LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net> Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0a6b3d5c490c..7fb592793738 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping) } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) @@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { iounmap_atomic(vaddr, slot); } -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { resource_size_t phys_addr; @@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { iounmap(vaddr); } @@ -125,38 +125,38 @@ struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping __force *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); + iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { } /* Non-atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { } -- cgit v1.2.3-59-g8ed1b From 73457f0f836956747e0394320be2163c050e96ef Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 6 Aug 2010 01:59:14 +0300 Subject: cgroups: fix API thinko cgroup_attach_task_current_cg API that have upstream is backwards: we really need an API to attach to the cgroups from another process A to the current one. In our case (vhost), a priveledged user wants to attach it's task to cgroups from a less priveledged one, the API makes us run it in the other task's context, and this fails. So let's make the API generic and just pass in 'from' and 'to' tasks. Add an inline wrapper for cgroup_attach_task_current_cg to avoid breaking bisect. Signed-off-by: Michael S. Tsirkin Acked-by: Li Zefan Acked-by: Paul Menage --- include/linux/cgroup.h | 11 ++++++++++- kernel/cgroup.c | 9 +++++---- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..5a53d8f039a2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,11 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +640,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a8ce09954404..9d90c08f3bde 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1789,10 +1789,11 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; struct cgroup *cur_cg; @@ -1800,7 +1801,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); + cur_cg = task_cgroup_from_root(from, root); retval = cgroup_attach_task(cur_cg, tsk); if (retval) break; @@ -1809,7 +1810,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3-59-g8ed1b From 831853c87fb7234a8650484d30993242ea9ad6d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Sep 2010 16:08:56 +0100 Subject: ALSA: Add more jack button slots Some devices have more flexible microphone detection and can detect a wider range of buttons. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/jack.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/jack.h b/include/sound/jack.h index d90b9fa32707..c140fc7cbd3f 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -47,6 +47,9 @@ enum snd_jack_types { SND_JACK_BTN_0 = 0x4000, SND_JACK_BTN_1 = 0x2000, SND_JACK_BTN_2 = 0x1000, + SND_JACK_BTN_3 = 0x0800, + SND_JACK_BTN_4 = 0x0400, + SND_JACK_BTN_5 = 0x0200, }; struct snd_jack { @@ -55,7 +58,7 @@ struct snd_jack { int type; const char *id; char name[100]; - unsigned int key[3]; /* Keep in sync with definitions above */ + unsigned int key[6]; /* Keep in sync with definitions above */ void *private_data; void (*private_free)(struct snd_jack *); }; -- cgit v1.2.3-59-g8ed1b From f8f235e5bbf4e61f3e0886a44afb1dc4cfe8f337 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 27 Aug 2010 11:08:57 +0800 Subject: agp/intel: Fix cache control for Sandybridge Sandybridge GTT has new cache control bits in PTE, which controls graphics page cache in LLC or LLC/MLC, so we need to extend the mask function to respect the new bits. And set cache control to always LLC only by default on Gen6. Signed-off-by: Zhenyu Wang Cc: stable@kernel.org Signed-off-by: Chris Wilson --- drivers/char/agp/intel-agp.c | 1 + drivers/char/agp/intel-gtt.c | 50 ++++++++++++++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_gem.c | 1 + include/linux/intel-gtt.h | 20 +++++++++++++++++ 4 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 include/linux/intel-gtt.h (limited to 'include') diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 710af89b176d..74461d177baf 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -12,6 +12,7 @@ #include #include "agp.h" #include "intel-agp.h" +#include #include "intel-gtt.c" diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 7f35854d33a3..64b10551a3f8 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -49,6 +49,26 @@ static struct gatt_mask intel_i810_masks[] = .type = INTEL_AGP_CACHED_MEMORY} }; +#define INTEL_AGP_UNCACHED_MEMORY 0 +#define INTEL_AGP_CACHED_MEMORY_LLC 1 +#define INTEL_AGP_CACHED_MEMORY_LLC_GFDT 2 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC 3 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT 4 + +static struct gatt_mask intel_gen6_masks[] = +{ + {.mask = I810_PTE_VALID | GEN6_PTE_UNCACHED, + .type = INTEL_AGP_UNCACHED_MEMORY }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_GFDT }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT }, +}; + static struct _intel_private { struct pci_dev *pcidev; /* device one */ u8 __iomem *registers; @@ -178,13 +198,6 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem, off_t pg_start, int mask_type) { int i, j; - u32 cache_bits = 0; - - if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) - { - cache_bits = GEN6_PTE_LLC_MLC; - } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, @@ -317,6 +330,23 @@ static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge, return 0; } +static int intel_gen6_type_to_mask_type(struct agp_bridge_data *bridge, + int type) +{ + unsigned int type_mask = type & ~AGP_USER_CACHED_MEMORY_GFDT; + unsigned int gfdt = type & AGP_USER_CACHED_MEMORY_GFDT; + + if (type_mask == AGP_USER_UNCACHED_MEMORY) + return INTEL_AGP_UNCACHED_MEMORY; + else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC) + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC_MLC; + else /* set 'normal'/'cached' to LLC by default */ + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC; +} + + static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, int type) { @@ -1163,7 +1193,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start, mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); - if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY && + if (!IS_SNB && mask_type != 0 && mask_type != AGP_PHYS_MEMORY && mask_type != INTEL_AGP_CACHED_MEMORY) goto out_err; @@ -1563,7 +1593,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .fetch_size = intel_i9xx_fetch_size, .cleanup = intel_i915_cleanup, .mask_memory = intel_gen6_mask_memory, - .masks = intel_i810_masks, + .masks = intel_gen6_masks, .agp_enable = intel_i810_agp_enable, .cache_flush = global_cache_flush, .create_gatt_table = intel_i965_create_gatt_table, @@ -1576,7 +1606,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .agp_alloc_pages = agp_generic_alloc_pages, .agp_destroy_page = agp_generic_destroy_page, .agp_destroy_pages = agp_generic_destroy_pages, - .agp_type_to_mask_type = intel_i830_type_to_mask_type, + .agp_type_to_mask_type = intel_gen6_type_to_mask_type, .chipset_flush = intel_i915_chipset_flush, #ifdef USE_PCI_DMA_API .agp_map_page = intel_agp_map_page, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 748c26340c35..16fca1d1799a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -34,6 +34,7 @@ #include #include #include +#include static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj); static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h new file mode 100644 index 000000000000..1d19ab2afa39 --- /dev/null +++ b/include/linux/intel-gtt.h @@ -0,0 +1,20 @@ +/* + * Common Intel AGPGART and GTT definitions. + */ +#ifndef _INTEL_GTT_H +#define _INTEL_GTT_H + +#include + +/* This is for Intel only GTT controls. + * + * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only + */ + +#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) +#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) + +/* flag for GFDT type */ +#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) + +#endif -- cgit v1.2.3-59-g8ed1b From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Sep 2010 14:46:37 +0200 Subject: semaphore: Add DEFINE_SEMAPHORE The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been done. Some of the users are real semaphores and we should name them as such instead of confusing everyone with "MUTEX". Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED] and DECLARE_MUTEX. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Christoph Hellwig LKML-Reference: <20100907125054.795929962@linutronix.de> --- include/linux/semaphore.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839ac890..5310d27abd2a 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -26,6 +26,9 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) -- cgit v1.2.3-59-g8ed1b From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 7 Sep 2010 15:52:06 +0800 Subject: spi/dw_spi: clean the cs_control code commit 052dc7c45i "spi/dw_spi: conditional transfer mode change" introduced cs_control code, which has a bug by using bit offset for spi mode to set transfer mode in control register. Also it forces devices who don't need cs_control to re-configure the control registers for each spi transfer. This patch will fix them Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 17 +++++------------ include/linux/spi/dw_spi.h | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 11fbbf6fb07b..56247853c298 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static void null_cs_control(u32 command) -{ -} - static int null_writer(struct dw_spi *dws) { u8 n_bytes = dws->n_bytes; @@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws) struct spi_transfer, transfer_list); - if (!last_transfer->cs_change) + if (!last_transfer->cs_change && dws->cs_control) dws->cs_control(MRST_SPI_DEASSERT); msg->state = NULL; @@ -549,13 +545,13 @@ static void pump_transfers(unsigned long data) */ if (dws->cs_control) { if (dws->rx && dws->tx) - chip->tmode = 0x00; + chip->tmode = SPI_TMOD_TR; else if (dws->rx) - chip->tmode = 0x02; + chip->tmode = SPI_TMOD_RO; else - chip->tmode = 0x01; + chip->tmode = SPI_TMOD_TO; - cr0 &= ~(0x3 << SPI_MODE_OFFSET); + cr0 &= ~SPI_TMOD_MASK; cr0 |= (chip->tmode << SPI_TMOD_OFFSET); } @@ -704,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi) chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL); if (!chip) return -ENOMEM; - - chip->cs_control = null_cs_control; - chip->enable_dma = 0; } /* diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index cc813f95a2f2..c91302f3a257 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -14,7 +14,9 @@ #define SPI_MODE_OFFSET 6 #define SPI_SCPH_OFFSET 6 #define SPI_SCOL_OFFSET 7 + #define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_MASK (0x3 << SPI_TMOD_OFFSET) #define SPI_TMOD_TR 0x0 /* xmit & recv */ #define SPI_TMOD_TO 0x1 /* xmit only */ #define SPI_TMOD_RO 0x2 /* recv only */ -- cgit v1.2.3-59-g8ed1b From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 5 Sep 2010 18:02:29 +0000 Subject: ipvs: fix active FTP - Do not create expectation when forwarding the PORT command to avoid blocking the connection. The problem is that nf_conntrack_ftp.c:help() tries to create the same expectation later in POST_ROUTING and drops the packet with "dropping packet" message after failure in nf_ct_expect_related. - Change ip_vs_update_conntrack to alter the conntrack for related connections from real server. If we do not alter the reply in this direction the next packet from client sent to vport 20 comes as NEW connection. We alter it but may be some collision happens for both conntracks and the second conntrack gets destroyed immediately. The connection stucks too. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_core.c | 1 + net/netfilter/ipvs/ip_vs_ftp.c | 6 ------ net/netfilter/ipvs/ip_vs_xmit.c | 18 ++++++++++++------ 4 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a4747a0f7303..f976885f686f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, + int outin); + #endif /* __KERNEL__ */ #endif /* _NET_IP_VS_H */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba48011..4c2f89df5cce 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 33b329bfc2d2..7e9af5b76d9e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e9b9d3..49df6bea6a2d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still -- cgit v1.2.3-59-g8ed1b From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 05:08:44 +0000 Subject: udp: add rehash on connect() commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation) added a secondary hash on UDP, hashed on (local addr, local port). Problem is that following sequence : fd = socket(...) connect(fd, &remote, ...) not only selects remote end point (address and port), but also sets local address, while UDP stack stored in secondary hash table the socket while its local address was INADDR_ANY (or ipv6 equivalent) Sequence is : - autobind() : choose a random local port, insert socket in hash tables [while local address is INADDR_ANY] - connect() : set remote address and port, change local address to IP given by a route lookup. When an incoming UDP frame comes, if more than 10 sockets are found in primary hash table, we switch to secondary table, and fail to find socket because its local address changed. One solution to this problem is to rehash datagram socket if needed. We add a new rehash(struct socket *) method in "struct proto", and implement this method for UDP v4 & v6, using a common helper. This rehashing only takes care of secondary hash table, since primary hash (based on local port only) is not changed. Reported-by: Krzysztof Piotr Oledzki Signed-off-by: Eric Dumazet Tested-by: Krzysztof Piotr Oledzki Signed-off-by: David S. Miller --- include/net/sock.h | 1 + include/net/udp.h | 1 + net/ipv4/datagram.c | 5 ++++- net/ipv4/udp.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/datagram.c | 7 ++++++- net/ipv6/udp.c | 10 ++++++++++ 6 files changed, 66 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..adab9dc58183 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -752,6 +752,7 @@ struct proto { /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Keeping track of sockets in use */ diff --git a/include/net/udp.h b/include/net/udp.h index 7abdf305da50..a184d3496b13 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk) } extern void udp_lib_unhash(struct sock *sk); +extern void udp_lib_rehash(struct sock *sk, u16 new_hash); static inline void udp_lib_close(struct sock *sk, long timeout) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f0550941df7b..721a8a37b45c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..fb23c2e63b52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a22cbc2..ef371aa01ac5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1affdead2..5acb3560ff15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, -- cgit v1.2.3-59-g8ed1b From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 20 Aug 2010 16:49:43 +0800 Subject: dquot: do full inode dirty in allocating space Alex Shi found a regression when doing ffsb test. The test has several threads, and each thread creates a small file, write to it and then delete it. ffsb reports about 20% regression and Alex bisected it to 43d2932d88e4. The test will call __mark_inode_dirty 3 times. without this commit, we only take inode_lock one time, while with it, we take the lock 3 times with flags ( I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased too much. Below proposed patch fixes it. fs is allocating blocks, which usually means file writes and the inode will be dirtied soon. We fully dirty the inode to reduce some inode_lock contention in several calls of __mark_inode_dirty. Jan Kara: Added comment. Signed-off-by: Shaohua Li Signed-off-by: Alex Shi Signed-off-by: Jan Kara --- include/linux/quotaops.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d50ba858cfe0..d1a9193960f1 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) int ret; ret = dquot_alloc_space_nodirty(inode, nr); - if (!ret) - mark_inode_dirty_sync(inode); + if (!ret) { + /* + * Mark inode fully dirty. Since we are allocating blocks, inode + * would become fully dirty soon anyway and it reportedly + * reduces inode_lock contention. + */ + mark_inode_dirty(inode); + } return ret; } -- cgit v1.2.3-59-g8ed1b From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 31 Aug 2010 15:52:27 +0200 Subject: mm: Move vma_stack_continue into mm.h So it can be used by all that need to check for that. Signed-off-by: Stefan Bader Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- include/linux/mm.h | 6 ++++++ mm/mlock.c | 6 ------ 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f1c1c4..271afc48b9a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (vma->vm_flags & VM_GROWSDOWN) - start += PAGE_SIZE; + if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) + start += PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, diff --git a/include/linux/mm.h b/include/linux/mm.h index e6b1210772ce..74949fbef8c6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -864,6 +864,12 @@ int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/mm/mlock.c b/mm/mlock.c index cbae7c5b9568..b70919ce4f72 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page) } } -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { return (vma->vm_flags & VM_GROWSDOWN) && -- cgit v1.2.3-59-g8ed1b From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 8 Sep 2010 16:54:54 -0600 Subject: lglock: make lg_lock_global() actually lock globally lg_lock_global() currently only acquires spinlocks for online CPUs, but it's meant to lock all possible CPUs. Lglock-protected resources may be associated with removed CPUs - and, indeed, that could happen with the per-superblock open files lists. At Nick's suggestion, change for_each_online_cpu() to for_each_possible_cpu() to protect accesses to those resources. Cc: Al Viro Acked-by: Nick Piggin Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index b288cb713b90..f549056fb20b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -150,7 +150,7 @@ int i; \ preempt_disable(); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -161,7 +161,7 @@ void name##_global_unlock(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ -- cgit v1.2.3-59-g8ed1b From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 9 Sep 2010 16:37:24 -0700 Subject: mmc: avoid getting CID on SDIO-only cards The introduction of support for SD combo cards breaks the initialization of all CSR SDIO chips. The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR chips to be reset (this is non-standard behavior). When initializing an SDIO card check for a combo card by using the memory present bit in the R4 response to IO_SEND_OP_COND (CMD5). This avoids the call to mmc_sd_get_cid() on an SDIO-only card. Signed-off-by: David Vrabel Acked-by: Michal Mirolaw Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/core/sdio.c | 5 ++--- include/linux/mmc/sdio.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index bd2755e8d9a3..f332c52968b7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto err; } - err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid); - - if (!err) { + if (ocr & R4_MEMORY_PRESENT + && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) { card->type = MMC_TYPE_SD_COMBO; if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO || diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 329a8faa6e37..245cdacee544 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,8 @@ * [8:0] Byte/block count */ +#define R4_MEMORY_PRESENT (1 << 27) + /* SDIO status in R5 Type -- cgit v1.2.3-59-g8ed1b From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 9 Sep 2010 16:37:26 -0700 Subject: kfifo: add parenthesis for macro parameter reference Some macro parameter references inside typeof() operator are not enclosed with parenthesis. It should be safer to add them. Signed-off-by: Huang Ying Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 58 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 4aa95f203f3e..62dbee554f60 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.in = __tmp->kfifo.out = 0; \ }) @@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset_out(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.out = __tmp->kfifo.in; \ }) @@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_len(fifo) \ ({ \ - typeof(fifo + 1) __tmpl = (fifo); \ + typeof((fifo) + 1) __tmpl = (fifo); \ __tmpl->kfifo.in - __tmpl->kfifo.out; \ }) @@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_empty(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) @@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_full(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ kfifo_len(__tmpq) > __tmpq->kfifo.mask; \ }) @@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val) #define kfifo_avail(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \ (__recsize) ? ((__avail <= __recsize) ? 0 : \ @@ -284,7 +284,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_skip(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ @@ -302,7 +302,7 @@ __kfifo_must_check_helper( \ #define kfifo_peek_len(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \ @@ -325,7 +325,7 @@ __kfifo_must_check_helper( \ #define kfifo_alloc(fifo, size, gfp_mask) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \ @@ -339,7 +339,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_free(fifo) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__is_kfifo_ptr(__tmp)) \ __kfifo_free(__kfifo); \ @@ -358,7 +358,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_init(fifo, buffer, size) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \ @@ -379,8 +379,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_put(fifo, val) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -421,8 +421,8 @@ __kfifo_must_check_helper( \ #define kfifo_get(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -462,8 +462,8 @@ __kfifo_must_check_helper( \ #define kfifo_peek(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -501,8 +501,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_in(fifo, buf, n) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -554,8 +554,8 @@ __kfifo_must_check_helper( \ #define kfifo_out(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -611,7 +611,7 @@ __kfifo_must_check_helper( \ #define kfifo_from_user(fifo, from, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -639,7 +639,7 @@ __kfifo_must_check_helper( \ #define kfifo_to_user(fifo, to, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -666,7 +666,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -690,7 +690,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -717,7 +717,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -741,7 +741,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -766,8 +766,8 @@ __kfifo_must_check_helper( \ #define kfifo_out_peek(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ -- cgit v1.2.3-59-g8ed1b From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Sep 2010 16:37:37 -0700 Subject: cgroups: fix API thinko Add cgroup_attach_task_all() The existing cgroup_attach_task_current_cg() API is called by a thread to attach another thread to all of its cgroups; this is unsuitable for cases where a privileged task wants to attach itself to the cgroups of a less privileged one, since the call must be made from the context of the target task. This patch adds a more generic cgroup_attach_task_all() API that allows both the source task and to-be-moved task to be specified. cgroup_attach_task_current_cg() becomes a specialization of the more generic new function. [menage@google.com: rewrote changelog] [akpm@linux-foundation.org: address reviewer comments] Signed-off-by: Michael S. Tsirkin Tested-by: Alex Williamson Acked-by: Paul Menage Cc: Li Zefan Cc: Ben Blum Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++++- kernel/cgroup.c | 13 +++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..0c991023ee47 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f9..c9483d8f6140 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1791,19 +1791,20 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; - struct cgroup *cur_cg; int retval = 0; cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); - retval = cgroup_attach_task(cur_cg, tsk); + struct cgroup *from_cg = task_cgroup_from_root(from, root); + + retval = cgroup_attach_task(from_cg, tsk); if (retval) break; } @@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3-59-g8ed1b From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Sep 2010 16:37:52 -0700 Subject: mm: fix swapin race condition The pte_same check is reliable only if the swap entry remains pinned (by the page lock on swapcache). We've also to ensure the swapcache isn't removed before we take the lock as try_to_free_swap won't care about the page pin. One of the possible impacts of this patch is that a KSM-shared page can point to the anon_vma of another process, which could exit before the page is freed. This can leave a page with a pointer to a recycled anon_vma object, or worse, a pointer to something that is no longer an anon_vma. [riel@redhat.com: changelog help] Signed-off-by: Andrea Arcangeli Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 20 +++++++++----------- mm/ksm.c | 3 --- mm/memory.c | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691ee9121..3319a6967626 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,6 +16,9 @@ struct stable_node; struct mem_cgroup; +struct page *ksm_does_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = page_anon_vma(page); - if (!anon_vma || - (anon_vma->root == vma->anon_vma->root && - page->index == linear_page_index(vma, address))) - return page; - - return ksm_does_need_to_copy(page, vma, address); + return anon_vma && + (anon_vma->root != vma->anon_vma->root || + page->index != linear_page_index(vma, address)); } int page_referenced_ksm(struct page *page, @@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return page; + return 0; } static inline int page_referenced_ksm(struct page *page, diff --git a/mm/ksm.c b/mm/ksm.c index e2ae00458320..b1873cf03ed9 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page, { struct page *new_page; - unlock_page(page); /* any racers will COW it, not modify it */ - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { copy_user_highpage(new_page, page, address, vma); @@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page, add_page_to_unevictable_list(new_page); } - page_cache_release(page); return new_page; } diff --git a/mm/memory.c b/mm/memory.c index 6b2ab1051851..71b161b73bb5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *page; + struct page *page, *swapcache = NULL; swp_entry_t entry; pte_t pte; struct mem_cgroup *ptr = NULL; @@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - page = ksm_might_need_to_copy(page, vma, address); - if (!page) { - ret = VM_FAULT_OOM; - goto out; + /* + * Make sure try_to_free_swap didn't release the swapcache + * from under us. The page pin isn't enough to prevent that. + */ + if (unlikely(!PageSwapCache(page))) + goto out_page; + + if (ksm_might_need_to_copy(page, vma, address)) { + swapcache = page; + page = ksm_does_need_to_copy(page, vma, address); + + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + swapcache = NULL; + goto out_page; + } } if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { @@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); + if (swapcache) { + /* + * Hold the lock to avoid the swap entry to be reused + * until we take the PT lock for the pte_same() check + * (to avoid false positives from pte_same). For + * further safety release the lock after the swap_free + * so that the swap count won't change under a + * parallel locked swapcache. + */ + unlock_page(swapcache); + page_cache_release(swapcache); + } if (flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); @@ -2756,6 +2781,10 @@ out_page: unlock_page(page); out_release: page_cache_release(page); + if (swapcache) { + unlock_page(swapcache); + page_cache_release(swapcache); + } return ret; } -- cgit v1.2.3-59-g8ed1b From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001 From: Gregory Bean Date: Thu, 9 Sep 2010 16:38:02 -0700 Subject: gpio: sx150x: correct and refine reset-on-probe behavior Replace the arbitrary software-reset call from the device-probe method, because: - It is defective. To work correctly, it should be two byte writes, not a single word write. As it stands, it does nothing. - Some devices with sx150x expanders installed have their NRESET pins ganged on the same line, so resetting one causes the others to reset - not a nice thing to do arbitrarily! - The probe, usually taking place at boot, implies a recent hard-reset, so a software reset at this point is just a waste of energy anyway. Therefore, make it optional, defaulting to off, as this will match the common case of probing at powerup and also matches the current broken no-op behavior. Signed-off-by: Gregory Bean Reviewed-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/sx150x.c | 26 +++++++++++++++++++++----- include/linux/i2c/sx150x.h | 4 ++++ 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c index b42f42ca70c3..823559ab0e24 100644 --- a/drivers/gpio/sx150x.c +++ b/drivers/gpio/sx150x.c @@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg) return err; } -static int sx150x_init_hw(struct sx150x_chip *chip, - struct sx150x_platform_data *pdata) +static int sx150x_reset(struct sx150x_chip *chip) { - int err = 0; + int err; - err = i2c_smbus_write_word_data(chip->client, + err = i2c_smbus_write_byte_data(chip->client, chip->dev_cfg->reg_reset, - 0x3412); + 0x12); if (err < 0) return err; + err = i2c_smbus_write_byte_data(chip->client, + chip->dev_cfg->reg_reset, + 0x34); + return err; +} + +static int sx150x_init_hw(struct sx150x_chip *chip, + struct sx150x_platform_data *pdata) +{ + int err = 0; + + if (pdata->reset_during_probe) { + err = sx150x_reset(chip); + if (err < 0) + return err; + } + err = sx150x_i2c_write(chip->client, chip->dev_cfg->reg_misc, 0x01); diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h index ee3049cb9ba5..52baa79d69a7 100644 --- a/include/linux/i2c/sx150x.h +++ b/include/linux/i2c/sx150x.h @@ -63,6 +63,9 @@ * IRQ lines will appear. Similarly to gpio_base, the expander * will create a block of irqs beginning at this number. * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. */ struct sx150x_platform_data { unsigned gpio_base; @@ -73,6 +76,7 @@ struct sx150x_platform_data { u16 io_polarity; int irq_summary; unsigned irq_base; + bool reset_during_probe; }; #endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.3-59-g8ed1b From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 9 Sep 2010 16:38:03 -0700 Subject: gpio: doc updates There's been some recent confusion about error checking GPIO numbers. briefly, it should be handled mostly during setup, when gpio_request() is called, and NEVER by expectig gpio_is_valid to report more than never-usable GPIO numbers. [akpm@linux-foundation.org: terminate unterminated comment] Signed-off-by: David Brownell Cc: Eric Miao" Cc: "Ryan Mallon" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gpio.txt | 22 ++++++++++++++-------- include/asm-generic/gpio.h | 14 +++++++++++++- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index d96a6dba5748..9633da01ff46 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. If you want to initialize a structure with an invalid GPIO number, use some negative number (perhaps "-EINVAL"); that will never be valid. To -test if a number could reference a GPIO, you may use this predicate: +test if such number from such a structure could reference a GPIO, you +may use this predicate: int gpio_is_valid(int number); A number that's not valid will be rejected by calls which may request or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but unused on a given board. - -Whether a platform supports multiple GPIO controllers is currently a -platform-specific implementation issue. +example, a number might be valid but temporarily unused on a given board. +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. Using GPIOs ----------- @@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB and arrange that its includes and defines three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). -They may also want to provide a custom value for ARCH_NR_GPIOS. -ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled +It may also provide a custom value for ARCH_NR_GPIOS, so that it better +reflects the number of GPIOs in actual use on that platform, without +wasting static table space. (It should count both built-in/SoC GPIOs and +also ones on GPIO expanders. + +ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled into the kernel on that architecture. -ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user +ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user can enable it and build it into the kernel optionally. If neither of these options are selected, the platform does not support diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c7376bf80b06..8ca18e26d7e3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,15 +16,27 @@ * While the GPIO programming interface defines valid GPIO numbers * to be in the range 0..MAX_INT, this library restricts them to the * smaller range 0..ARCH_NR_GPIOS-1. + * + * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of + * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is + * actually an estimate of a board-specific value. */ #ifndef ARCH_NR_GPIOS #define ARCH_NR_GPIOS 256 #endif +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + static inline int gpio_is_valid(int number) { - /* only some non-negative numbers are valid */ return ((unsigned)number) < ARCH_NR_GPIOS; } -- cgit v1.2.3-59-g8ed1b From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:07 -0700 Subject: swap: revert special hibernation allocation Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf "hibernation: freeze swap at hibernation". It complicated matters by adding a second swap allocation path, just for hibernation; without in any way fixing the issue that it was intended to address - page reclaim after fixing the hibernation image might free swap from a page already imaged as swapcache, letting its swap be reallocated to store a different page of the image: resulting in data corruption if the imaged page were freed as clean then swapped back in. Pages freed to si->swap_map were still in danger of being reallocated by the alternative allocation path. I guess it inadvertently fixed slow SSD swap allocation for hibernation, as reported by Nigel Cunningham: by missing out the discards that occur on the usual swap allocation path; but that was unintentional, and needs a separate fix. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: "Rafael J. Wysocki" Cc: Ondrej Zary Cc: Andrea Gelmini Cc: Balbir Singh Cc: Andrea Arcangeli Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 +---- kernel/power/hibernate.c | 1 - kernel/power/snapshot.c | 1 - kernel/power/swap.c | 6 ++-- mm/swapfile.c | 94 ++++++++++++------------------------------------ 5 files changed, 26 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fee51a11b73..bf4eb62506db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -315,6 +315,7 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_HIBERNATION -void hibernation_freeze_swap(void); -void hibernation_thaw_swap(void); -swp_entry_t get_swap_for_hibernation(int type); -void swap_free_for_hibernation(swp_entry_t val); -#endif - /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c77963938bca..8dc31e02ae12 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5e7edfb05e66..f6cd6faf84fd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,7 +1086,6 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; - hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5d0059eed3e4..e6a5bdf61a37 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_for_hibernation(swap)); + offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); kfree(ext); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f3f9c59a73a..f08d165871b3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,8 +47,6 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; -static bool swap_for_hibernation; - static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; - if (swap_for_hibernation) - goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -487,6 +483,28 @@ noswap: return (swp_entry_t) {0}; } +/* The only caller of this function is now susupend routine */ +swp_entry_t get_swap_page_of_type(int type) +{ + struct swap_info_struct *si; + pgoff_t offset; + + spin_lock(&swap_lock); + si = swap_info[type]; + if (si && (si->flags & SWP_WRITEOK)) { + nr_swap_pages--; + /* This is called for allocating swap entry, not cache */ + offset = scan_swap_map(si, 1); + if (offset) { + spin_unlock(&swap_lock); + return swp_entry(type, offset); + } + nr_swap_pages++; + } + spin_unlock(&swap_lock); + return (swp_entry_t) {0}; +} + static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION - -static pgoff_t hibernation_offset[MAX_SWAPFILES]; -/* - * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, - * saved swap_map[] image to the disk will be an incomplete because it's - * changing without synchronization with hibernation snap shot. - * At resume, we just make swap_for_hibernation=false. We can forget - * used maps easily. - */ -void hibernation_freeze_swap(void) -{ - int i; - - spin_lock(&swap_lock); - - printk(KERN_INFO "PM: Freeze Swap\n"); - swap_for_hibernation = true; - for (i = 0; i < MAX_SWAPFILES; i++) - hibernation_offset[i] = 1; - spin_unlock(&swap_lock); -} - -void hibernation_thaw_swap(void) -{ - spin_lock(&swap_lock); - if (swap_for_hibernation) { - printk(KERN_INFO "PM: Thaw Swap\n"); - swap_for_hibernation = false; - } - spin_unlock(&swap_lock); -} - -/* - * Because updateing swap_map[] can make not-saved-status-change, - * we use our own easy allocator. - * Please see kernel/power/swap.c, Used swaps are recorded into - * RB-tree. - */ -swp_entry_t get_swap_for_hibernation(int type) -{ - pgoff_t off; - swp_entry_t val = {0}; - struct swap_info_struct *si; - - spin_lock(&swap_lock); - - si = swap_info[type]; - if (!si || !(si->flags & SWP_WRITEOK)) - goto done; - - for (off = hibernation_offset[type]; off < si->max; ++off) { - if (!si->swap_map[off]) - break; - } - if (off < si->max) { - val = swp_entry(type, off); - hibernation_offset[type] = off + 1; - } -done: - spin_unlock(&swap_lock); - return val; -} - -void swap_free_for_hibernation(swp_entry_t ent) -{ - /* Nothing to do */ -} - /* * Find the swap type that corresponds to given device (if any). * -- cgit v1.2.3-59-g8ed1b From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:11 -0700 Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs show a shift since I last tested in December: in part because of firmware updates, in part because of the necessary move from barriers to awaiting completion at the block layer. While discard at swapon still shows as slightly beneficial on both, discarding 1MB swap cluster when allocating is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV). Surrender: discard as presently implemented is more hindrance than help for swap; but might prove useful on other devices, or with improvements. So continue to do the discard at swapon, but make discard while swapping conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using only the lower 16 bits of int flags). We can add a --discard or -d to swapon(8), and a "discard" to swap in /etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Cc: Nigel Cunningham Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Cc: "Martin K. Petersen" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- mm/swapfile.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bf4eb62506db..7cdd63366f88 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -19,6 +19,7 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ static inline int current_is_kswapd(void) { @@ -142,7 +143,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 1894dead0b58..7c703ff2f36f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags |= SWP_SOLIDSTATE; p->cluster_next = 1 + (random32() % p->highest_bit); } - if (discard_swap(p) == 0) + if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD)) p->flags |= SWP_DISCARDABLE; } -- cgit v1.2.3-59-g8ed1b From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Sep 2010 16:38:17 -0700 Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++++++++ include/linux/vmstat.h | 22 ++++++++++++++++++++++ mm/mmzone.c | 21 +++++++++++++++++++++ mm/page_alloc.c | 4 ++-- mm/vmstat.c | 15 ++++++++++++++- 5 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e6e62648a4d..3984c4eb41fd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -283,6 +283,13 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +#ifdef CONFIG_SMP +unsigned long zone_nr_free_pages(struct zone *zone); +#else +#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) +#endif /* CONFIG_SMP */ + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7f43ccdc1d38..eaaea37b3b75 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +/* + * More accurate version that also considers the currently pending + * deltas. For that we need to loop over all cpus to find the current + * deltas. There is no synchronization so the result cannot be + * exactly accurate either. + */ +static inline unsigned long zone_page_state_snapshot(struct zone *zone, + enum zone_stat_item item) +{ + long x = atomic_long_read(&zone->vm_stat[item]); + +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +#endif + return x; +} + extern unsigned long global_reclaimable_pages(void); extern unsigned long zone_reclaimable_pages(struct zone *zone); diff --git a/mm/mmzone.c b/mm/mmzone.c index f5b7d1760213..e35bfb82c855 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn, return 1; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + +#ifdef CONFIG_SMP +/* Called when a more accurate view of NR_FREE_PAGES is needed */ +unsigned long zone_nr_free_pages(struct zone *zone) +{ + unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES); + + /* + * While kswapd is awake, it is considered the zone is under some + * memory pressure. Under pressure, there is a risk that + * per-cpu-counter-drift will allow the min watermark to be breached + * potentially causing a live-lock. While kswapd is awake and + * free pages are low, get a better estimate for free pages + */ + if (nr_free_pages < zone->percpu_drift_mark && + !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) + return zone_page_state_snapshot(zone, NR_FREE_PAGES); + + return nr_free_pages; +} +#endif /* CONFIG_SMP */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 452e2ba06c7c..b2d21e06d45d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1462,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, { /* free_pages my go negative - that's OK */ long min = mark; - long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; + long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -2424,7 +2424,7 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone_nr_free_pages(zone)), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), diff --git a/mm/vmstat.c b/mm/vmstat.c index a8d6b59e609a..355a9e669aaa 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void) int threshold; for_each_populated_zone(zone) { + unsigned long max_drift, tolerate_drift; + threshold = calculate_threshold(zone); for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; + + /* + * Only set percpu_drift_mark if there is a danger that + * NR_FREE_PAGES reports the low watermark is ok when in fact + * the min watermark could be breached by an allocation + */ + tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); + max_drift = num_online_cpus() * threshold; + if (max_drift > tolerate_drift) + zone->percpu_drift_mark = high_wmark_pages(zone) + + max_drift; } } @@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n scanned %lu" "\n spanned %lu" "\n present %lu", - zone_page_state(zone, NR_FREE_PAGES), + zone_nr_free_pages(zone), min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), -- cgit v1.2.3-59-g8ed1b From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Sep 2010 14:05:31 +0200 Subject: libata: skip EH autopsy and recovery during suspend For some mysterious reason, certain hardware reacts badly to usual EH actions while the system is going for suspend. As the devices won't be needed until the system is resumed, ask EH to skip usual autopsy and recovery and proceed directly to suspend. Signed-off-by: Tejun Heo Tested-by: Stephan Diestelhorst Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 14 +++++++++++++- drivers/ata/libata-eh.c | 4 ++++ include/linux/libata.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c035b3d041ee..932eaee50245 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5418,6 +5418,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg, */ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) { + unsigned int ehi_flags = ATA_EHI_QUIET; int rc; /* @@ -5426,7 +5427,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) */ ata_lpm_enable(host); - rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1); + /* + * On some hardware, device fails to respond after spun down + * for suspend. As the device won't be used before being + * resumed, we don't need to touch the device. Ask EH to skip + * the usual stuff and proceed directly to suspend. + * + * http://thread.gmane.org/gmane.linux.ide/46764 + */ + if (mesg.event == PM_EVENT_SUSPEND) + ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY; + + rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1); if (rc == 0) host->dev->power.power_state = mesg; return rc; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c9ae299b8342..e48302eae55f 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3235,6 +3235,10 @@ static int ata_eh_skip_recovery(struct ata_link *link) if (link->flags & ATA_LFLAG_DISABLED) return 1; + /* skip if explicitly requested */ + if (ehc->i.flags & ATA_EHI_NO_RECOVERY) + return 1; + /* thaw frozen port and recover failed devices */ if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) return 0; diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18a0f86..7de282d8bedf 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -335,6 +335,7 @@ enum { ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ + ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ -- cgit v1.2.3-59-g8ed1b From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 31 Aug 2010 16:20:36 -0700 Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling. Keep track of the link on the which the current request is in progress. It allows support of links behind port multiplier. Not all libata-sff is PMP compliant. Code for native BMDMA controller does not take in accound PMP. Tested on Marvell 7042 and Sil7526. Signed-off-by: Gwendal Grignou Signed-off-by: Jeff Garzik --- drivers/ata/libata-sff.c | 38 ++++++++++++++++++++++++++++---------- drivers/ata/sata_mv.c | 2 +- include/linux/libata.h | 3 ++- 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index dee3c2c52562..e30c537cce32 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1045,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq) { - struct ata_eh_info *ehi = &ap->link.eh_info; + struct ata_link *link = qc->dev->link; + struct ata_eh_info *ehi = &link->eh_info; unsigned long flags = 0; int poll_next; @@ -1301,8 +1302,14 @@ fsm_start: } EXPORT_SYMBOL_GPL(ata_sff_hsm_move); -void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay) +void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay) { + struct ata_port *ap = link->ap; + + WARN_ON((ap->sff_pio_task_link != NULL) && + (ap->sff_pio_task_link != link)); + ap->sff_pio_task_link = link; + /* may fail if ata_sff_flush_pio_task() in progress */ queue_delayed_work(ata_sff_wq, &ap->sff_pio_task, msecs_to_jiffies(delay)); @@ -1324,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, sff_pio_task.work); + struct ata_link *link = ap->sff_pio_task_link; struct ata_queued_cmd *qc; u8 status; int poll_next; + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ - qc = ata_qc_from_tag(ap, ap->link.active_tag); - if (!qc) + qc = ata_qc_from_tag(ap, link->active_tag); + if (!qc) { + ap->sff_pio_task_link = NULL; return; + } fsm_start: WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE); @@ -1348,11 +1359,16 @@ fsm_start: msleep(2); status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { - ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE); + ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); return; } } + /* + * hsm_move() may trigger another command to be processed. + * clean the link beforehand. + */ + ap->sff_pio_task_link = NULL; /* move the HSM */ poll_next = ata_sff_hsm_move(ap, qc, status, 1); @@ -1379,6 +1395,7 @@ fsm_start: unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* Use polling pio if the LLD doesn't handle * interrupt driven pio and atapi CDB interrupt. @@ -1399,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST_LAST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; @@ -1412,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_WRITE) { /* PIO data out protocol */ ap->hsm_task_state = HSM_ST_FIRST; - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* always send first data block using the * ata_sff_pio_task() codepath. @@ -1422,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* if polling, ata_sff_pio_task() handles the * rest. otherwise, interrupt handler takes @@ -1444,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) || (qc->tf.flags & ATA_TFLAG_POLLING)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: @@ -2737,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep); unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* defer PIO handling to sff_qc_issue */ if (!ata_is_dma(qc->tf.protocol)) @@ -2765,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 81982594a014..a9fd9709c262 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc) } if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 7de282d8bedf..45fb2967b66d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -724,6 +724,7 @@ struct ata_port { struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ + struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ @@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); -extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay); +extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, -- cgit v1.2.3-59-g8ed1b From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:55:25 -0400 Subject: SUNRPC: Fix a race in rpc_info_open There is a race between rpc_info_open and rpc_release_client() in that nothing stops a process from opening the file after the clnt->cl_kref goes to zero. Fix this by using atomic_inc_unless_zero()... Reported-by: J. Bruce Fields Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 26 ++++++++++++-------------- net/sunrpc/rpc_pipe.c | 14 ++++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc722a600..85f38a63f098 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - struct kref cl_kref; /* Number of references */ + atomic_t cl_count; /* Number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 657aac630fc9..3a8f53e7ba07 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_no_principal; } - kref_init(&clnt->cl_kref); + atomic_set(&clnt->cl_count, 1); err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) @@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - kref_init(&new->cl_kref); + atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); xprt_get(clnt->cl_xprt); - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; @@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); * Free an RPC client */ static void -rpc_free_client(struct kref *kref) +rpc_free_client(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_path.dentry)) { @@ -495,12 +493,10 @@ out_free: * Free an RPC client */ static void -rpc_free_auth(struct kref *kref) +rpc_free_auth(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - if (clnt->cl_auth == NULL) { - rpc_free_client(kref); + rpc_free_client(clnt); return; } @@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref) * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - kref_init(kref); + atomic_inc(&clnt->cl_count); rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; - kref_put(kref, rpc_free_client); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); } /* @@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_auth); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_auth(clnt); } /** @@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_release_client(task); task->tk_client = clnt; - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; /* Add to the client's list of all tasks */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 41a762f82630..8c8eef2b8f26 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v) static int rpc_info_open(struct inode *inode, struct file *file) { - struct rpc_clnt *clnt; + struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; - mutex_lock(&inode->i_mutex); - clnt = RPC_I(inode)->private; - if (clnt) { - kref_get(&clnt->cl_kref); + + spin_lock(&file->f_path.dentry->d_lock); + if (!d_unhashed(file->f_path.dentry)) + clnt = RPC_I(inode)->private; + if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { + spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } - mutex_unlock(&inode->i_mutex); } return ret; } -- cgit v1.2.3-59-g8ed1b From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 Sep 2010 16:51:36 +0200 Subject: workqueue: add documentation Update copyright notice and add Documentation/workqueue.txt. Randy Dunlap, Dave Chinner: misc fixes. Signed-off-by: Tejun Heo Reviewed-By: Florian Mickler Cc: Ingo Molnar Cc: Christoph Lameter Cc: Randy Dunlap Cc: Dave Chinner --- Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/workqueue.h | 4 + kernel/workqueue.c | 27 ++-- 3 files changed, 401 insertions(+), 10 deletions(-) create mode 100644 Documentation/workqueue.txt (limited to 'include') diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt new file mode 100644 index 000000000000..e4498a2872c3 --- /dev/null +++ b/Documentation/workqueue.txt @@ -0,0 +1,380 @@ + +Concurrency Managed Workqueue (cmwq) + +September, 2010 Tejun Heo + Florian Mickler + +CONTENTS + +1. Introduction +2. Why cmwq? +3. The Design +4. Application Programming Interface (API) +5. Example Execution Scenarios +6. Guidelines + + +1. Introduction + +There are many cases where an asynchronous process execution context +is needed and the workqueue (wq) API is the most commonly used +mechanism for such cases. + +When such an asynchronous execution context is needed, a work item +describing which function to execute is put on a queue. An +independent thread serves as the asynchronous execution context. The +queue is called workqueue and the thread is called worker. + +While there are work items on the workqueue the worker executes the +functions associated with the work items one after the other. When +there is no work item left on the workqueue the worker becomes idle. +When a new work item gets queued, the worker begins executing again. + + +2. Why cmwq? + +In the original wq implementation, a multi threaded (MT) wq had one +worker thread per CPU and a single threaded (ST) wq had one worker +thread system-wide. A single MT wq needed to keep around the same +number of workers as the number of CPUs. The kernel grew a lot of MT +wq users over the years and with the number of CPU cores continuously +rising, some systems saturated the default 32k PID space just booting +up. + +Although MT wq wasted a lot of resource, the level of concurrency +provided was unsatisfactory. The limitation was common to both ST and +MT wq albeit less severe on MT. Each wq maintained its own separate +worker pool. A MT wq could provide only one execution context per CPU +while a ST wq one for the whole system. Work items had to compete for +those very limited execution contexts leading to various problems +including proneness to deadlocks around the single execution context. + +The tension between the provided level of concurrency and resource +usage also forced its users to make unnecessary tradeoffs like libata +choosing to use ST wq for polling PIOs and accepting an unnecessary +limitation that no two polling PIOs can progress at the same time. As +MT wq don't provide much better concurrency, users which require +higher level of concurrency, like async or fscache, had to implement +their own thread pool. + +Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with +focus on the following goals. + +* Maintain compatibility with the original workqueue API. + +* Use per-CPU unified worker pools shared by all wq to provide + flexible level of concurrency on demand without wasting a lot of + resource. + +* Automatically regulate worker pool and level of concurrency so that + the API users don't need to worry about such details. + + +3. The Design + +In order to ease the asynchronous execution of functions a new +abstraction, the work item, is introduced. + +A work item is a simple struct that holds a pointer to the function +that is to be executed asynchronously. Whenever a driver or subsystem +wants a function to be executed asynchronously it has to set up a work +item pointing to that function and queue that work item on a +workqueue. + +Special purpose threads, called worker threads, execute the functions +off of the queue, one after the other. If no work is queued, the +worker threads become idle. These worker threads are managed in so +called thread-pools. + +The cmwq design differentiates between the user-facing workqueues that +subsystems and drivers queue work items on and the backend mechanism +which manages thread-pool and processes the queued work items. + +The backend is called gcwq. There is one gcwq for each possible CPU +and one gcwq to serve work items queued on unbound workqueues. + +Subsystems and drivers can create and queue work items through special +workqueue API functions as they see fit. They can influence some +aspects of the way the work items are executed by setting flags on the +workqueue they are putting the work item on. These flags include +things like CPU locality, reentrancy, concurrency limits and more. To +get a detailed overview refer to the API description of +alloc_workqueue() below. + +When a work item is queued to a workqueue, the target gcwq is +determined according to the queue parameters and workqueue attributes +and appended on the shared worklist of the gcwq. For example, unless +specifically overridden, a work item of a bound workqueue will be +queued on the worklist of exactly that gcwq that is associated to the +CPU the issuer is running on. + +For any worker pool implementation, managing the concurrency level +(how many execution contexts are active) is an important issue. cmwq +tries to keep the concurrency at a minimal but sufficient level. +Minimal to save resources and sufficient in that the system is used at +its full capacity. + +Each gcwq bound to an actual CPU implements concurrency management by +hooking into the scheduler. The gcwq is notified whenever an active +worker wakes up or sleeps and keeps track of the number of the +currently runnable workers. Generally, work items are not expected to +hog a CPU and consume many cycles. That means maintaining just enough +concurrency to prevent work processing from stalling should be +optimal. As long as there are one or more runnable workers on the +CPU, the gcwq doesn't start execution of a new work, but, when the +last running worker goes to sleep, it immediately schedules a new +worker so that the CPU doesn't sit idle while there are pending work +items. This allows using a minimal number of workers without losing +execution bandwidth. + +Keeping idle workers around doesn't cost other than the memory space +for kthreads, so cmwq holds onto idle ones for a while before killing +them. + +For an unbound wq, the above concurrency management doesn't apply and +the gcwq for the pseudo unbound CPU tries to start executing all work +items as soon as possible. The responsibility of regulating +concurrency level is on the users. There is also a flag to mark a +bound wq to ignore the concurrency management. Please refer to the +API section for details. + +Forward progress guarantee relies on that workers can be created when +more execution contexts are necessary, which in turn is guaranteed +through the use of rescue workers. All work items which might be used +on code paths that handle memory reclaim are required to be queued on +wq's that have a rescue-worker reserved for execution under memory +pressure. Else it is possible that the thread-pool deadlocks waiting +for execution contexts to free up. + + +4. Application Programming Interface (API) + +alloc_workqueue() allocates a wq. The original create_*workqueue() +functions are deprecated and scheduled for removal. alloc_workqueue() +takes three arguments - @name, @flags and @max_active. @name is the +name of the wq and also used as the name of the rescuer thread if +there is one. + +A wq no longer manages execution resources but serves as a domain for +forward progress guarantee, flush and work item attributes. @flags +and @max_active control how work items are assigned execution +resources, scheduled and executed. + +@flags: + + WQ_NON_REENTRANT + + By default, a wq guarantees non-reentrance only on the same + CPU. A work item may not be executed concurrently on the same + CPU by multiple workers but is allowed to be executed + concurrently on multiple CPUs. This flag makes sure + non-reentrance is enforced across all CPUs. Work items queued + to a non-reentrant wq are guaranteed to be executed by at most + one worker system-wide at any given time. + + WQ_UNBOUND + + Work items queued to an unbound wq are served by a special + gcwq which hosts workers which are not bound to any specific + CPU. This makes the wq behave as a simple execution context + provider without concurrency management. The unbound gcwq + tries to start execution of work items as soon as possible. + Unbound wq sacrifices locality but is useful for the following + cases. + + * Wide fluctuation in the concurrency level requirement is + expected and using bound wq may end up creating large number + of mostly unused workers across different CPUs as the issuer + hops through different CPUs. + + * Long running CPU intensive workloads which can be better + managed by the system scheduler. + + WQ_FREEZEABLE + + A freezeable wq participates in the freeze phase of the system + suspend operations. Work items on the wq are drained and no + new work item starts execution until thawed. + + WQ_RESCUER + + All wq which might be used in the memory reclaim paths _MUST_ + have this flag set. This reserves one worker exclusively for + the execution of this wq under memory pressure. + + WQ_HIGHPRI + + Work items of a highpri wq are queued at the head of the + worklist of the target gcwq and start execution regardless of + the current concurrency level. In other words, highpri work + items will always start execution as soon as execution + resource is available. + + Ordering among highpri work items is preserved - a highpri + work item queued after another highpri work item will start + execution after the earlier highpri work item starts. + + Although highpri work items are not held back by other + runnable work items, they still contribute to the concurrency + level. Highpri work items in runnable state will prevent + non-highpri work items from starting execution. + + This flag is meaningless for unbound wq. + + WQ_CPU_INTENSIVE + + Work items of a CPU intensive wq do not contribute to the + concurrency level. In other words, runnable CPU intensive + work items will not prevent other work items from starting + execution. This is useful for bound work items which are + expected to hog CPU cycles so that their execution is + regulated by the system scheduler. + + Although CPU intensive work items don't contribute to the + concurrency level, start of their executions is still + regulated by the concurrency management and runnable + non-CPU-intensive work items can delay execution of CPU + intensive work items. + + This flag is meaningless for unbound wq. + + WQ_HIGHPRI | WQ_CPU_INTENSIVE + + This combination makes the wq avoid interaction with + concurrency management completely and behave as a simple + per-CPU execution context provider. Work items queued on a + highpri CPU-intensive wq start execution as soon as resources + are available and don't affect execution of other work items. + +@max_active: + +@max_active determines the maximum number of execution contexts per +CPU which can be assigned to the work items of a wq. For example, +with @max_active of 16, at most 16 work items of the wq can be +executing at the same time per CPU. + +Currently, for a bound wq, the maximum limit for @max_active is 512 +and the default value used when 0 is specified is 256. For an unbound +wq, the limit is higher of 512 and 4 * num_possible_cpus(). These +values are chosen sufficiently high such that they are not the +limiting factor while providing protection in runaway cases. + +The number of active work items of a wq is usually regulated by the +users of the wq, more specifically, by how many work items the users +may queue at the same time. Unless there is a specific need for +throttling the number of active work items, specifying '0' is +recommended. + +Some users depend on the strict execution ordering of ST wq. The +combination of @max_active of 1 and WQ_UNBOUND is used to achieve this +behavior. Work items on such wq are always queued to the unbound gcwq +and only one work item can be active at any given time thus achieving +the same ordering property as ST wq. + + +5. Example Execution Scenarios + +The following example execution scenarios try to illustrate how cmwq +behave under different configurations. + + Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU. + w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms + again before finishing. w1 and w2 burn CPU for 5ms then sleep for + 10ms. + +Ignoring all other tasks, works and processing overhead, and assuming +simple FIFO scheduling, the following is one highly simplified version +of possible sequences of events with the original wq. + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 starts and burns CPU + 25 w1 sleeps + 35 w1 wakes up and finishes + 35 w2 starts and burns CPU + 40 w2 sleeps + 50 w2 wakes up and finishes + +And with cmwq with @max_active >= 3, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 10 w2 starts and burns CPU + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + +If @max_active == 2, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 20 w2 starts and burns CPU + 25 w2 sleeps + 35 w2 wakes up and finishes + +Now, let's assume w1 and w2 are queued to a different wq q1 which has +WQ_HIGHPRI set, + + TIME IN MSECS EVENT + 0 w1 and w2 start and burn CPU + 5 w1 sleeps + 10 w2 sleeps + 10 w0 starts and burns CPU + 15 w0 sleeps + 15 w1 wakes up and finishes + 20 w2 wakes up and finishes + 25 w0 wakes up and burns CPU + 30 w0 finishes + +If q1 has WQ_CPU_INTENSIVE set, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 and w2 start and burn CPU + 10 w1 sleeps + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + + +6. Guidelines + +* Do not forget to use WQ_RESCUER if a wq may process work items which + are used during memory reclaim. Each wq with WQ_RESCUER set has one + rescuer thread reserved for it. If there is dependency among + multiple work items used during memory reclaim, they should be + queued to separate wq each with WQ_RESCUER. + +* Unless strict ordering is required, there is no need to use ST wq. + +* Unless there is a specific need, using 0 for @max_active is + recommended. In most use cases, concurrency level usually stays + well under the default limit. + +* A wq serves as a domain for forward progress guarantee (WQ_RESCUER), + flush and work item attributes. Work items which are not involved + in memory reclaim and don't need to be flushed as a part of a group + of work items, and don't require any special attribute, can use one + of the system wq. There is no difference in execution + characteristics between using a dedicated wq and a system wq. + +* Unless work items are expected to consume a huge amount of CPU + cycles, using a bound wq is usually beneficial due to the increased + level of locality in wq operations and work item execution. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f96482..25e02c941bac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) +/* + * Workqueue flags and constants. For details, please refer to + * Documentation/workqueue.txt. + */ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 727f24e563ae..f77afd939229 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1,19 +1,26 @@ /* - * linux/kernel/workqueue.c + * kernel/workqueue.c - generic async execution with shared worker pool * - * Generic mechanism for defining kernel helper threads for running - * arbitrary tasks in process context. + * Copyright (C) 2002 Ingo Molnar * - * Started by Ingo Molnar, Copyright (C) 2002 + * Derived from the taskqueue/keventd code by: + * David Woodhouse + * Andrew Morton + * Kai Petzke + * Theodore Ts'o * - * Derived from the taskqueue/keventd code by: + * Made to use alloc_percpu by Christoph Lameter. * - * David Woodhouse - * Andrew Morton - * Kai Petzke - * Theodore Ts'o + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo * - * Made to use alloc_percpu by Christoph Lameter. + * This is the generic async execution mechanism. Work items as are + * executed in process context. The worker pool is shared and + * automatically managed. There is one worker pool for each CPU and + * one extra for works which are better served by workers which are + * not bound to any specific CPU. + * + * Please read Documentation/workqueue.txt for details. */ #include -- cgit v1.2.3-59-g8ed1b From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Sep 2010 23:51:02 +0100 Subject: drm: Use a nondestructive mode for output detect when polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Destructive load-detection is very expensive and due to failings elsewhere can trigger system wide stalls of up to 600ms. A simple first step to correcting this is not to invoke such an expensive and destructive load-detection operation automatically. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265 Reported-by: Bruno Prémont Tested-by: Sitsofe Wheeler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 4 ++-- drivers/gpu/drm/drm_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_crt.c | 7 ++++++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- drivers/gpu/drm/i915/intel_dvo.c | 4 +++- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++-- drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++-- drivers/gpu/drm/i915/intel_tv.c | 12 ++++++------ drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++++--- drivers/gpu/drm/radeon/radeon_connectors.c | 20 +++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 3 ++- include/drm/drm_crtc.h | 3 ++- 13 files changed, 56 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index de152a58967d..fb6b70fc6572 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (connector->funcs->force) connector->funcs->force(connector); } else { - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, false); drm_kms_helper_poll_enable(dev); } @@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); if (old_status != status) changed = true; } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 86118a742231..85da4c40694c 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device, struct drm_connector *connector = to_drm_connector(device); enum drm_connector_status status; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_connector_status_name(status)); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4b7735196cd5..0350e5d711f8 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -400,7 +400,9 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder return status; } -static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_crt_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -419,6 +421,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(encoder)) return connector_status_connected; + if (nondestructive) + return connector->status; + /* for pre-945g platforms use load detect */ if (encoder->crtc && encoder->crtc->enabled) { status = intel_crt_load_detect(encoder->crtc, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 51d142939a26..e1a2a05fb838 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1386,7 +1386,8 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector) +intel_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index a399f4b2c1c5..f0de1addf8a4 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -221,7 +221,9 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * * Unimplemented. */ -static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_dvo_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ccd4c97e6524..2ea123d8d22b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,7 +139,8 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector) +intel_hdmi_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 4fbb0165b26f..fb1bed8f4071 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -445,7 +445,9 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * connected and closed means disconnected. We also send hotplug events as * needed, using lid status notification from the input layer. */ -static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -540,7 +542,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, * the LID nofication event. */ if (connector) - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, + true); + /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) return NOTIFY_OK; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e3b7a7ee39cb..db6b6d4b8fae 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector) == + if (analog_connector->funcs->detect(analog_connector, true) == connector_status_disconnected) return false; @@ -1486,7 +1486,9 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) return status; } -static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_sdvo_detect(struct drm_connector *connector, + bool nondestructive) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index c671f60ce80b..d20b550c0f55 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1341,7 +1341,8 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector) +intel_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1353,7 +1354,7 @@ intel_tv_detect(struct drm_connector *connector) if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else { + } else if (nondestructive) { struct drm_crtc *crtc; int dpms_mode; @@ -1364,10 +1365,9 @@ intel_tv_detect(struct drm_connector *connector) intel_release_load_detect_pipe(&intel_tv->base, connector, dpms_mode); } else - type = -1; - } - - intel_tv->type = type; + return connector_status_unknown; + } else + return connector->status; if (type < 0) return connector_status_disconnected; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a1473fff06ac..67d515cb67e0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,7 +168,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector) +nouveau_connector_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -246,7 +247,8 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector) +nouveau_connector_detect_lvds(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -267,7 +269,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector) /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector); + status = nouveau_connector_detect(connector, nondestructive); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index a9dd7847d96e..31d309a8e75b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -481,7 +481,9 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -594,7 +596,9 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_vga_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -691,7 +695,9 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -748,7 +754,9 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * we have to check if this analog encoder is shared with anyone else (TV) * if its shared we have to set the other connector to disconnected. */ -static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dvi_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -972,7 +980,9 @@ static int radeon_dp_get_modes(struct drm_connector *connector) return ret; } -static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 2ff5cf78235f..a527c91c0ba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) } static enum drm_connector_status - vmw_ldu_connector_detect(struct drm_connector *connector) + vmw_ldu_connector_detect(struct drm_connector *connector, + bool nondestructive) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c9f3cc5949a8..5536223fbac8 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,7 +386,8 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); - enum drm_connector_status (*detect)(struct drm_connector *connector); + enum drm_connector_status (*detect)(struct drm_connector *connector, + bool nondestructive); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.3-59-g8ed1b From 930a9e283516a3a3595c0c515113f1b78d07f695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Sep 2010 11:07:23 +0100 Subject: drm: Use a nondestructive mode for output detect when polling (v2) v2: Julien Cristau pointed out that @nondestructive results in double-negatives and confusion when trying to interpret the parameter, so use @force instead. Much easier to type as well. ;-) And fix the miscompilation of vmgfx reported by Sedat Dilek. Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 4 ++-- drivers/gpu/drm/i915/intel_crt.c | 5 ++--- drivers/gpu/drm/i915/intel_dp.c | 3 +-- drivers/gpu/drm/i915/intel_dvo.c | 3 +-- drivers/gpu/drm/i915/intel_hdmi.c | 3 +-- drivers/gpu/drm/i915/intel_lvds.c | 5 ++--- drivers/gpu/drm/i915/intel_sdvo.c | 5 ++--- drivers/gpu/drm/i915/intel_tv.c | 5 ++--- drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++----- drivers/gpu/drm/radeon/radeon_connectors.c | 15 +++++---------- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 6 +++--- include/drm/drm_crtc.h | 9 ++++++++- 12 files changed, 32 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index fb6b70fc6572..dcbeb98f195a 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (connector->funcs->force) connector->funcs->force(connector); } else { - connector->status = connector->funcs->detect(connector, false); + connector->status = connector->funcs->detect(connector, true); drm_kms_helper_poll_enable(dev); } @@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector, true); + status = connector->funcs->detect(connector, false); if (old_status != status) changed = true; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 0350e5d711f8..a02a8df73727 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -401,8 +401,7 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder } static enum drm_connector_status -intel_crt_detect(struct drm_connector *connector, - bool nondestructive) +intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -421,7 +420,7 @@ intel_crt_detect(struct drm_connector *connector, if (intel_crt_detect_ddc(encoder)) return connector_status_connected; - if (nondestructive) + if (!force) return connector->status; /* for pre-945g platforms use load detect */ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e1a2a05fb838..1a51ee07de3e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1386,8 +1386,7 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector, - bool nondestructive) +intel_dp_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index f0de1addf8a4..7c9ec1472d46 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -222,8 +222,7 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * Unimplemented. */ static enum drm_connector_status -intel_dvo_detect(struct drm_connector *connector, - bool nondestructive) +intel_dvo_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 2ea123d8d22b..926934a482ec 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,8 +139,7 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector, - bool nondestructive) +intel_hdmi_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index fb1bed8f4071..6ec39a86ed06 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -446,8 +446,7 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * needed, using lid status notification from the input layer. */ static enum drm_connector_status -intel_lvds_detect(struct drm_connector *connector, - bool nondestructive) +intel_lvds_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -543,7 +542,7 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, */ if (connector) connector->status = connector->funcs->detect(connector, - true); + false); /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index db6b6d4b8fae..e8e902d614ed 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector, true) == + if (analog_connector->funcs->detect(analog_connector, false) == connector_status_disconnected) return false; @@ -1487,8 +1487,7 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) } static enum drm_connector_status -intel_sdvo_detect(struct drm_connector *connector, - bool nondestructive) +intel_sdvo_detect(struct drm_connector *connector, bool force) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index d20b550c0f55..4a117e318a73 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1341,8 +1341,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector, - bool nondestructive) +intel_tv_detect(struct drm_connector *connector, bool force) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1354,7 +1353,7 @@ intel_tv_detect(struct drm_connector *connector, if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else if (nondestructive) { + } else if (force) { struct drm_crtc *crtc; int dpms_mode; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 67d515cb67e0..87186a4bbf03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,8 +168,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector, - bool nondestructive) +nouveau_connector_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -247,8 +246,7 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector, - bool nondestructive) +nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -269,7 +267,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector, /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector, nondestructive); + status = nouveau_connector_detect(connector, force); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 31d309a8e75b..ecc1a8fafbfd 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -482,8 +482,7 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_lvds_detect(struct drm_connector *connector, - bool nondestructive) +radeon_lvds_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -597,8 +596,7 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_vga_detect(struct drm_connector *connector, - bool nondestructive) +radeon_vga_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -696,8 +694,7 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_tv_detect(struct drm_connector *connector, - bool nondestructive) +radeon_tv_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -755,8 +752,7 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * if its shared we have to set the other connector to disconnected. */ static enum drm_connector_status -radeon_dvi_detect(struct drm_connector *connector, - bool nondestructive) +radeon_dvi_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -981,8 +977,7 @@ static int radeon_dp_get_modes(struct drm_connector *connector) } static enum drm_connector_status -radeon_dp_detect(struct drm_connector *connector, - bool nondestructive) +radeon_dp_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index a527c91c0ba6..7083b1a24df3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -336,7 +336,7 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) static enum drm_connector_status vmw_ldu_connector_detect(struct drm_connector *connector, - bool nondestructive) + bool force) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; @@ -517,7 +517,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_connector_init(dev, connector, &vmw_legacy_connector_funcs, DRM_MODE_CONNECTOR_LVDS); - connector->status = vmw_ldu_connector_detect(connector); + connector->status = vmw_ldu_connector_detect(connector, true); drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs, DRM_MODE_ENCODER_LVDS); @@ -611,7 +611,7 @@ int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num, ldu->pref_height = 600; ldu->pref_active = false; } - con->status = vmw_ldu_connector_detect(con); + con->status = vmw_ldu_connector_detect(con, true); } mutex_unlock(&dev->mode_config.mutex); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 5536223fbac8..3e5a51af757c 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,8 +386,15 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); + + /* Check to see if anything is attached to the connector. + * @force is set to false whilst polling, true when checking the + * connector due to user request. @force can be used by the driver + * to avoid expensive, destructive operations during automated + * probing. + */ enum drm_connector_status (*detect)(struct drm_connector *connector, - bool nondestructive); + bool force); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.3-59-g8ed1b From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 7 Sep 2010 16:16:18 -0700 Subject: compat: Make compat_alloc_user_space() incorporate the access_ok() compat_alloc_user_space() expects the caller to independently call access_ok() to verify the returned area. A missing call could introduce problems on some architectures. This patch incorporates the access_ok() check into compat_alloc_user_space() and also adds a sanity check on the length. The existing compat_alloc_user_space() implementations are renamed arch_compat_alloc_user_space() and are used as part of the implementation of the new global function. This patch assumes NULL will cause __get_user()/__put_user() to either fail or access userspace on all architectures. This should be followed by checking the return value of compat_access_user_space() for NULL in the callers, at which time the access_ok() in the callers can also be removed. Reported-by: Ben Hawkes Signed-off-by: H. Peter Anvin Acked-by: Benjamin Herrenschmidt Acked-by: Chris Metcalf Acked-by: David S. Miller Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Tony Luck Cc: Andrew Morton Cc: Arnd Bergmann Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Helge Deller Cc: James Bottomley Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Paul Mackerras Cc: Ralf Baechle Cc: --- arch/ia64/include/asm/compat.h | 2 +- arch/mips/include/asm/compat.h | 2 +- arch/parisc/include/asm/compat.h | 2 +- arch/powerpc/include/asm/compat.h | 2 +- arch/s390/include/asm/compat.h | 2 +- arch/sparc/include/asm/compat.h | 2 +- arch/tile/include/asm/compat.h | 2 +- arch/x86/include/asm/compat.h | 2 +- include/linux/compat.h | 3 +++ kernel/compat.c | 21 +++++++++++++++++++++ 10 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h index f90edc85b509..9301a2821615 100644 --- a/arch/ia64/include/asm/compat.h +++ b/arch/ia64/include/asm/compat.h @@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr) } static __inline__ void __user * -compat_alloc_user_space (long len) +arch_compat_alloc_user_space (long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len); diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 613f6912dfc1..dbc51065df5b 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = (struct pt_regs *) ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 02b77baa5da6..efa0b60c63fe 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static __inline__ void __user *compat_alloc_user_space(long len) +static __inline__ void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = ¤t->thread.regs; return (void __user *)regs->gr[30]; diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 396d21a80058..a11d4eac4f97 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current->thread.regs; unsigned long usp = regs->gpr[1]; diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 104f2007f097..a875c2f542e1 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -181,7 +181,7 @@ static inline int is_compat_task(void) #endif -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 5016f76ea98a..6f57325bb883 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current_thread_info()->kregs; unsigned long usp = regs->u_regs[UREG_I6]; diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 5a34da6cdd79..345d81ce44bb 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr) return (long)(int)(long __force)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 306160e58b48..1d9cd27c2920 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/include/linux/compat.h b/include/linux/compat.h index 9ddc8780e8db..5778b559d59c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/kernel/compat.c b/kernel/compat.c index e167efce8423..c9e2ec0b34a8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } + +/* + * Allocate user-space memory for the duration of a single system call, + * in order to marshall parameters inside a compat thunk. + */ +void __user *compat_alloc_user_space(unsigned long len) +{ + void __user *ptr; + + /* If len would occupy more than half of the entire compat space... */ + if (unlikely(len > (((compat_uptr_t)~0) >> 1))) + return NULL; + + ptr = arch_compat_alloc_user_space(len); + + if (unlikely(!access_ok(VERIFY_WRITE, ptr, len))) + return NULL; + + return ptr; +} +EXPORT_SYMBOL_GPL(compat_alloc_user_space); -- cgit v1.2.3-59-g8ed1b From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Wed, 15 Sep 2010 10:27:52 -0700 Subject: tcp: Prevent overzealous packetization by SWS logic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised window, the SWS logic will packetize to half the MSS unnecessarily. This causes problems with some embedded devices. However for large MSS devices we do want to half-MSS packetize otherwise we never get enough packets into the pipe for things like fast retransmit and recovery to work. Be careful also to handle the case where MSS > window, otherwise we'll never send until the probe timer. Reported-by: ツ Leandro Melo de Sales Signed-off-by: David S. Miller --- include/net/tcp.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index eaa9582779d0..3e4b33e36602 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } -- cgit v1.2.3-59-g8ed1b From f0f9deae9e7c421fa0c1c627beb8e174325e1ba7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 17 Sep 2010 16:55:03 -0700 Subject: netpoll: Disable IRQ around RCU dereference in netpoll_rx We cannot use rcu_dereference_bh safely in netpoll_rx as we may be called with IRQs disabled. We could however simply disable IRQs as that too causes BH to be disabled and is safe in either case. Thanks to John Linville for discovering this bug and providing a patch. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netpoll.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 791d5109f34c..50d8009be86c 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb) unsigned long flags; bool ret = false; - rcu_read_lock_bh(); + local_irq_save(flags); npinfo = rcu_dereference_bh(skb->dev->npinfo); if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags)) goto out; - spin_lock_irqsave(&npinfo->rx_lock, flags); + spin_lock(&npinfo->rx_lock); /* check rx_flags again with the lock held */ if (npinfo->rx_flags && __netpoll_rx(skb)) ret = true; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + spin_unlock(&npinfo->rx_lock); out: - rcu_read_unlock_bh(); + local_irq_restore(flags); return ret; } -- cgit v1.2.3-59-g8ed1b From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Mon, 20 Sep 2010 11:11:38 -0700 Subject: xfrm: Allow different selector family in temporary state The family parameter xfrm_state_find is used to find a state matching a certain policy. This value is set to the template's family (encap_family) right before xfrm_state_find is called. The family parameter is however also used to construct a temporary state in xfrm_state_find itself which is wrong for inter-family scenarios because it produces a selector for the wrong family. Since this selector is included in the xfrm_user_acquire structure, user space programs misinterpret IPv6 addresses as IPv4 and vice versa. This patch splits up the original init_tempsel function into a part that initializes the selector respectively the props and id of the temporary state, to allow for differing ip address families whithin the state. Signed-off-by: Thomas Egerer Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv4/xfrm4_state.c | 33 +++++++++++++++++++-------------- net/ipv6/xfrm6_state.c | 33 +++++++++++++++++++-------------- net/xfrm/xfrm_policy.c | 5 ++--- net/xfrm/xfrm_state.c | 45 +++++++++++++++++++++++++++------------------ 5 files changed, 69 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fc8f36dd0f5c..4f53532d4c2f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,8 +298,8 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, + void (*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl); + void (*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366a0a03..47947624eccc 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +{ + sel->daddr.a4 = fl->fl4_dst; + sel->saddr.a4 = fl->fl4_src; + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) { - x->sel.daddr.a4 = fl->fl4_dst; - x->sel.saddr.a4 = fl->fl4_src; - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; x->id = tmpl->id; if (x->id.daddr.a4 == 0) x->id.daddr.a4 = daddr->a4; @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77fa0e1..a67575d472a3 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b3ed7ad4933..cbab6e1a8c9c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(net, &tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5208b12fbfb4..eb96ce52f178 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } @@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int error = 0; struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } if (best) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } @@ -829,7 +838,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; goto out; @@ -839,9 +848,9 @@ found: error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); @@ -856,10 +865,10 @@ found: x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(net, daddr, saddr, family); + h = xfrm_src_hash(net, daddr, saddr, encap_family); hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; -- cgit v1.2.3-59-g8ed1b From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Sep 2010 14:35:37 -0700 Subject: fs: {lock,unlock}_flocks() stubs to prepare for BKL removal The lock structs are currently protected by the BKL, but are accessed by code in fs/locks.c and misc file system and DLM code. These stubs will allow all users to switch to the new interface before the implementation is changed to a spinlock. Acked-by: Arnd Bergmann Signed-off-by: Sage Weil Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..63d069bd80b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,6 +1093,10 @@ struct file_lock { #include +/* temporary stubs for BKL removal */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING -- cgit v1.2.3-59-g8ed1b From 56b49f4b8f6728b91d10c556c116175051b77b60 Mon Sep 17 00:00:00 2001 From: Ollie Wild Date: Wed, 22 Sep 2010 05:54:54 +0000 Subject: net: Move "struct net" declaration inside the __KERNEL__ macro guard This patch reduces namespace pollution by moving the "struct net" declaration out of the userspace-facing portion of linux/netlink.h. It has no impact on the kernel. (This came up because we have several C++ applications which use "net" as a namespace name.) Signed-off-by: Ollie Wild Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 59d066936ab9..123566912d73 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -27,8 +27,6 @@ #define MAX_LINKS 32 -struct net; - struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ @@ -151,6 +149,8 @@ struct nlattr { #include #include +struct net; + static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; -- cgit v1.2.3-59-g8ed1b From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001 From: Mathieu Lacage Date: Sat, 14 Aug 2010 15:02:44 +0200 Subject: missing inline keyword for static function in linux/dmaengine.h Add a missing inline keyword for static function in linux/dmaengine.h to avoid duplicate symbol definitions. Signed-off-by: Mathieu Lacage Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c61d4ca27bcc..e2106495cc11 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma) return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; } -static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma) { return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; } -- cgit v1.2.3-59-g8ed1b From 710224fa2750cf449c02dd115548acebfdd2c86a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 22 Sep 2010 13:04:55 -0700 Subject: arm: fix "arm: fix pci_set_consistent_dma_mask for dmabounce devices" This fixes the regression caused by the commit 6fee48cd330c68 ("dma-mapping: arm: use generic pci_set_dma_mask and pci_set_consistent_dma_mask"). ARM needs to clip the dma coherent mask for dmabounce devices. This restores the old trick. Note that strictly speaking, the DMA API doesn't allow architectures to do such but I'm not sure it's worth adding the new API to set the dma mask that allows architectures to clip it. Reported-by: Krzysztof Halasa Signed-off-by: FUJITA Tomonori Acked-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/common/it8152.c | 8 ++++++++ arch/arm/mach-ixp4xx/common-pci.c | 8 ++++++++ arch/arm/mach-ixp4xx/include/mach/hardware.h | 2 ++ arch/arm/mach-pxa/include/mach/hardware.h | 2 +- arch/arm/mach-pxa/include/mach/io.h | 2 ++ include/linux/dma-mapping.h | 4 ++++ 6 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 7974baacafce..1bec96e85196 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= PHYS_OFFSET + SZ_64M - 1) + return 0; + + return -EIO; +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 61cd4d64b985..24498a932ba6 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys) return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= SZ_64M - 1) + return 0; + + return -EIO; +} + EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h index f91ca6d4fbe8..8138371c406e 100644 --- a/arch/arm/mach-ixp4xx/include/mach/hardware.h +++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h @@ -26,6 +26,8 @@ #define PCIBIOS_MAX_MEM 0x4BFFFFFF #endif +#define ARCH_HAS_DMA_SET_COHERENT_MASK + #define pcibios_assign_all_busses() 1 /* Register locations and bits */ diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h index 7f64d24cd564..428cc7bda9a4 100644 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ b/arch/arm/mach-pxa/include/mach/hardware.h @@ -309,7 +309,7 @@ extern unsigned long get_clock_tick_rate(void); #define PCIBIOS_MIN_IO 0 #define PCIBIOS_MIN_MEM 0 #define pcibios_assign_all_busses() 1 +#define ARCH_HAS_DMA_SET_COHERENT_MASK #endif - #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h index 262691fb97d8..fdca3be47d9b 100644 --- a/arch/arm/mach-pxa/include/mach/io.h +++ b/arch/arm/mach-pxa/include/mach/io.h @@ -6,6 +6,8 @@ #ifndef __ASM_ARM_ARCH_IO_H #define __ASM_ARM_ARCH_IO_H +#include + #define IO_SPACE_LIMIT 0xffffffff /* diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ce29b8151198..ba8319ae5fcc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } +#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask); +#else static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) @@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } +#endif extern u64 dma_get_required_mask(struct device *dev); -- cgit v1.2.3-59-g8ed1b From 4c894f47bb49284008073d351c0ddaac8860864e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 23 Sep 2010 15:15:19 +0200 Subject: x86/amd-iommu: Work around S3 BIOS bug This patch adds a workaround for an IOMMU BIOS problem to the AMD IOMMU driver. The result of the bug is that the IOMMU does not execute commands anymore when the system comes out of the S3 state resulting in system failure. The bug in the BIOS is that is does not restore certain hardware specific registers correctly. This workaround reads out the contents of these registers at boot time and restores them on resume from S3. The workaround is limited to the specific IOMMU chipset where this problem occurs. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 6 ++++++ arch/x86/include/asm/amd_iommu_types.h | 9 +++++++++ arch/x86/kernel/amd_iommu_init.c | 18 ++++++++++++++++++ include/linux/pci_ids.h | 3 +++ 4 files changed, 36 insertions(+) (limited to 'include') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index d2544f1d705d..cb030374b90a 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { } #endif /* !CONFIG_AMD_IOMMU_STATS */ +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ef2d5cd7d7e7..08616180deaf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -414,6 +414,15 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + + /* + * This array is required to work around a potential BIOS bug. + * The BIOS may miss to restore parts of the PCI configuration + * space when the system resumes from S3. The result is that the + * IOMMU does not execute commands anymore which leads to system + * failure. + */ + u32 cache_cfg[4]; }; /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 85e9817ead43..5a170cbbbed8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (is_rd890_iommu(iommu->dev)) { + pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); + pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); + pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); + pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); + } } /* @@ -1120,6 +1127,16 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); } +static void iommu_apply_quirks(struct amd_iommu *iommu) +{ + if (is_rd890_iommu(iommu->dev)) { + pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); + pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); + pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); + pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1130,6 +1147,7 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_apply_quirks(iommu); iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d33309e9a6..570fddeb0388 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -393,6 +393,9 @@ #define PCI_DEVICE_ID_VLSI_82C147 0x0105 #define PCI_DEVICE_ID_VLSI_VAS96011 0x0702 +/* AMD RD890 Chipset */ +#define PCI_DEVICE_ID_RD890_IOMMU 0x5a23 + #define PCI_VENDOR_ID_ADL 0x1005 #define PCI_DEVICE_ID_ADL_2301 0x2301 -- cgit v1.2.3-59-g8ed1b From b3a084b9b684622b149e8dcf03855bf0d5fb588b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 08:38:44 +0200 Subject: rcu: rcu_read_lock_bh_held(): disabling irqs also disables bh rcu_dereference_bh() doesnt know yet about hard irq being disabled, so lockdep can trigger in netpoll_rx() after commit f0f9deae9e7c4 (netpoll: Disable IRQ around RCU dereference in netpoll_rx) Reported-by: Miles Lane Signed-off-by: Eric Dumazet Tested-by: Miles Lane Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585d..83af1f8d8b74 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) + rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) /** * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched -- cgit v1.2.3-59-g8ed1b From f459ffbdfd04edb4a8ce6eea33170eb057a5e695 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 25 Sep 2010 17:45:50 +1000 Subject: drm/radeon: fix PCI ID 5657 to be an RV410 fixes https://bugzilla.kernel.org/show_bug.cgi?id=19012 cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 3a9940ef728b..883c1d439899 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -85,7 +85,6 @@ {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ - {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ @@ -103,6 +102,7 @@ {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ -- cgit v1.2.3-59-g8ed1b From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 23 Sep 2010 11:19:54 +0000 Subject: net: reset skb queue mapping when rx'ing over tunnel Reset queue mapping when an skb is reentering the stack via a tunnel. On second pass, the queue mapping from the original device is no longer valid. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 81d1413a8701..02386505033d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); skb_dst_drop(skb); nf_reset(skb); } -- cgit v1.2.3-59-g8ed1b From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 24 Sep 2010 09:55:52 +0000 Subject: ipv6: add a missing unregister_pernet_subsys call Clean up a missing exit path in the ipv6 module init routines. In addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys for the ipv6_addr_label_ops structure. But if module loading fails, or if the ipv6 module is removed, there is no corresponding unregister_pernet_subsys call, which leaves a now-bogus address on the pernet_list, leading to oopses in subsequent registrations. This patch cleans up both the failed load path and the unload path. Tested by myself with good results. Signed-off-by: Neil Horman include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/addrlabel.c | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/addrlabel.c | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a0..4d40c4d0230b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) * IPv6 Address Label subsystem (addrlabel.c) */ extern int ipv6_addr_label_init(void); +extern void ipv6_addr_label_cleanup(void); extern void ipv6_addr_label_rtnl_register(void); extern u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab70a3fbcafa..324fac3b6c16 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4637,10 +4637,12 @@ int __init addrconf_init(void) if (err < 0) { printk(KERN_CRIT "IPv6 Addrconf:" " cannot initialize default policy table: %d.\n", err); - return err; + goto out; } - register_pernet_subsys(&addrconf_ops); + err = register_pernet_subsys(&addrconf_ops); + if (err < 0) + goto out_addrlabel; /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4692,7 +4694,9 @@ errout: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); - +out_addrlabel: + ipv6_addr_label_cleanup(); +out: return err; } @@ -4703,6 +4707,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); + ipv6_addr_label_cleanup(); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774cea386..8175f802651b 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void) return register_pernet_subsys(&ipv6_addr_label_ops); } +void ipv6_addr_label_cleanup(void) +{ + unregister_pernet_subsys(&ipv6_addr_label_ops); +} + static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, -- cgit v1.2.3-59-g8ed1b From fb0c5f0bc8b69b40549449ee7fc65f3706f12062 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 27 Sep 2010 03:31:00 +0000 Subject: tproxy: check for transparent flag in ip_route_newports as done in ip_route_connect() Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- include/net/route.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index bd732d62e1c3..7e5e73bfa4de 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, &fl); -- cgit v1.2.3-59-g8ed1b From 31dfbc93923c0aaa0440b809f80ff2830c6a531a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Sep 2010 21:28:30 +0100 Subject: drm: Prune GEM vma entries Hook the GEM vm open/close ops into the generic drm vm open/close so that the private vma entries are created and destroy appropriately. Fixes the leak of the drm_vma_entries during the lifetime of the filp. Reported-by: Matt Mackall Cc: Jesse Barnes Signed-off-by: Chris Wilson Acked-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 9 ++++++++- drivers/gpu/drm/drm_vm.c | 28 ++++++++++++++++++---------- include/drm/drmP.h | 1 + 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index bf92d07510df..6fe2cd298c12 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -528,6 +528,10 @@ void drm_gem_vm_open(struct vm_area_struct *vma) struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_reference(obj); + + mutex_lock(&obj->dev->struct_mutex); + drm_vm_open_locked(vma); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_open); @@ -535,7 +539,10 @@ void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; - drm_gem_object_unreference_unlocked(obj); + mutex_lock(&obj->dev->struct_mutex); + drm_vm_close_locked(vma); + drm_gem_object_unreference(obj); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_close); diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index fda67468e603..5df450683aab 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -433,15 +433,7 @@ static void drm_vm_open(struct vm_area_struct *vma) mutex_unlock(&dev->struct_mutex); } -/** - * \c close method for all virtual memory types. - * - * \param vma virtual memory area. - * - * Search the \p vma private data entry in drm_device::vmalist, unlink it, and - * free it. - */ -static void drm_vm_close(struct vm_area_struct *vma) +void drm_vm_close_locked(struct vm_area_struct *vma) { struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; @@ -451,7 +443,6 @@ static void drm_vm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end - vma->vm_start); atomic_dec(&dev->vma_count); - mutex_lock(&dev->struct_mutex); list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { if (pt->vma == vma) { list_del(&pt->head); @@ -459,6 +450,23 @@ static void drm_vm_close(struct vm_area_struct *vma) break; } } +} + +/** + * \c close method for all virtual memory types. + * + * \param vma virtual memory area. + * + * Search the \p vma private data entry in drm_device::vmalist, unlink it, and + * free it. + */ +static void drm_vm_close(struct vm_area_struct *vma) +{ + struct drm_file *priv = vma->vm_file->private_data; + struct drm_device *dev = priv->minor->dev; + + mutex_lock(&dev->struct_mutex); + drm_vm_close_locked(vma); mutex_unlock(&dev->struct_mutex); } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230adee..774e1d49509b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1175,6 +1175,7 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); +extern void drm_vm_close_locked(struct vm_area_struct *vma); extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3-59-g8ed1b From 2fc11536cf5c0b8eb4eb7e01a2a672a189e9280f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 7 Sep 2010 06:10:45 -0300 Subject: V4L/DVB: videobuf-dma-sg: set correct size in last sg element This fixes a nasty memory corruption bug when using userptr I/O. The function videobuf_pages_to_sg() sets up the scatter-gather list for the DMA transfer to the userspace pages. The first transfer is setup correctly (the size is set to PAGE_SIZE - offset), but all other transfers have size PAGE_SIZE. This is wrong for the last transfer which may be less than PAGE_SIZE. Most, if not all, drivers will program the boards DMA engine correctly, i.e. even though the size in the last sg element is wrong, they will do their own size calculations and make sure the right amount is DMA-ed, and so seemingly prevent memory corruption. However, behind the scenes the dynamic DMA mapping support (in lib/swiotlb.c) may create bounce buffers if the memory pages are not in DMA-able memory. This happens for example on a 64-bit linux with a board that only supports 32-bit DMA. These bounce buffers DO use the information in the sg list to determine the size. So while the DMA engine transfers the correct amount of data, when the data is 'bounced' back too much is copied, causing buffer overwrites. The fix is simple: calculate and set the correct size for the last sg list element. Signed-off-by: Hans Verkuil Cc: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/videobuf-dma-sg.c | 11 +++++++---- include/media/videobuf-dma-sg.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c index 06f9a9c2a39a..2ad0bc252b0e 100644 --- a/drivers/media/video/videobuf-dma-sg.c +++ b/drivers/media/video/videobuf-dma-sg.c @@ -94,7 +94,7 @@ err: * must free the memory. */ static struct scatterlist *videobuf_pages_to_sg(struct page **pages, - int nr_pages, int offset) + int nr_pages, int offset, size_t size) { struct scatterlist *sglist; int i; @@ -110,12 +110,14 @@ static struct scatterlist *videobuf_pages_to_sg(struct page **pages, /* DMA to highmem pages might not work */ goto highmem; sg_set_page(&sglist[0], pages[0], PAGE_SIZE - offset, offset); + size -= PAGE_SIZE - offset; for (i = 1; i < nr_pages; i++) { if (NULL == pages[i]) goto nopage; if (PageHighMem(pages[i])) goto highmem; - sg_set_page(&sglist[i], pages[i], PAGE_SIZE, 0); + sg_set_page(&sglist[i], pages[i], min(PAGE_SIZE, size), 0); + size -= min(PAGE_SIZE, size); } return sglist; @@ -170,7 +172,8 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, first = (data & PAGE_MASK) >> PAGE_SHIFT; last = ((data+size-1) & PAGE_MASK) >> PAGE_SHIFT; - dma->offset = data & ~PAGE_MASK; + dma->offset = data & ~PAGE_MASK; + dma->size = size; dma->nr_pages = last-first+1; dma->pages = kmalloc(dma->nr_pages * sizeof(struct page *), GFP_KERNEL); if (NULL == dma->pages) @@ -252,7 +255,7 @@ int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma) if (dma->pages) { dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages, - dma->offset); + dma->offset, dma->size); } if (dma->vaddr) { dma->sglist = videobuf_vmalloc_to_sg(dma->vaddr, diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h index 97e07f46a0fa..aa4ebb42a565 100644 --- a/include/media/videobuf-dma-sg.h +++ b/include/media/videobuf-dma-sg.h @@ -48,6 +48,7 @@ struct videobuf_dmabuf { /* for userland buffer */ int offset; + size_t size; struct page **pages; /* for kernel buffers */ -- cgit v1.2.3-59-g8ed1b From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Sep 2010 20:24:54 -0700 Subject: tcp: Fix >4GB writes on 64-bit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes kernel bugzilla #16603 tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write zero bytes, for example. There is also the problem higher up of how verify_iovec() works. It wants to prevent the total length from looking like an error return value. However it does this using 'int', but syscalls return 'long' (and thus signed 64-bit on 64-bit machines). So it could trigger false-positives on 64-bit as written. So fix it to use 'long'. Reported-by: Olaf Bonorden Reported-by: Daniel Büse Reported-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- net/core/iovec.c | 5 +++-- net/ipv4/tcp.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index a2fada9becb6..a8f56e1ec760 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, int offset, unsigned int len, __wsum *csump); -extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); +extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); diff --git a/net/core/iovec.c b/net/core/iovec.c index 1cd98df412df..e6b133b77ccb 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,9 +35,10 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { - int size, err, ct; + int size, ct; + long err; if (m->msg_namelen) { if (mode == VERIFY_READ) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 95d75d443927..f115ea68a4ef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = sk->sk_route_caps & NETIF_F_SG; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; -- cgit v1.2.3-59-g8ed1b From 58f87ed0d45141a90167f34c0959d607160a26df Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 7 Sep 2010 12:49:45 -0400 Subject: ACPI: Fix typos Signed-off-by: Len Brown --- drivers/acpi/acpica/exutils.c | 2 +- drivers/acpi/acpica/rsutils.c | 2 +- drivers/acpi/apei/Kconfig | 2 +- drivers/acpi/apei/erst-dbg.c | 2 +- drivers/acpi/apei/erst.c | 2 +- drivers/acpi/bus.c | 4 ++-- drivers/acpi/processor_perflib.c | 4 ++-- include/acpi/acpixf.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 74c24d517f81..4093522eed45 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -109,7 +109,7 @@ void acpi_ex_enter_interpreter(void) * * DESCRIPTION: Reacquire the interpreter execution region from within the * interpreter code. Failure to enter the interpreter region is a - * fatal system error. Used in conjuction with + * fatal system error. Used in conjunction with * relinquish_interpreter * ******************************************************************************/ diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index 22cfcfbd9fff..491191e6cf69 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -149,7 +149,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) /* * 16-, 32-, and 64-bit cases must use the move macros that perform - * endian conversion and/or accomodate hardware that cannot perform + * endian conversion and/or accommodate hardware that cannot perform * misaligned memory transfers */ case ACPI_RSC_MOVE16: diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 907e350f1c7d..fca34ccfd294 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -34,6 +34,6 @@ config ACPI_APEI_ERST_DEBUG depends on ACPI_APEI help ERST is a way provided by APEI to save and retrieve hardware - error infomation to and from a persistent store. Enable this + error information to and from a persistent store. Enable this if you want to debugging and testing the ERST kernel support and firmware implementation. diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 5281ddda2777..98ffa2991ebc 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table debug support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. This file provide the + * information to and from a persistent store. This file provide the * debugging/testing support for ERST kernel support and firmware * implementation. * diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 18645f4e83cd..a4904f1680cf 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. + * information to and from a persistent store. * * For more information about ERST, please refer to ACPI Specification * version 4.0, section 17.4. diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 5c221ab535d5..cc17b352d1c5 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(acpi_root_dir); static int set_power_nocheck(const struct dmi_system_id *id) { printk(KERN_NOTICE PREFIX "%s detected - " - "disable power check in power transistion\n", id->ident); + "disable power check in power transition\n", id->ident); acpi_power_nocheck = 1; return 0; } @@ -1027,7 +1027,7 @@ static int __init acpi_init(void) /* * If the laptop falls into the DMI check table, the power state check - * will be disabled in the course of device power transistion. + * will be disabled in the course of device power transition. */ dmi_check_system(power_nocheck_dmi_table); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index ba1bd263d903..3a73a93596e8 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -447,8 +447,8 @@ int acpi_processor_notify_smm(struct module *calling_module) if (!try_module_get(calling_module)) return -EINVAL; - /* is_done is set to negative if an error occured, - * and to postitive if _no_ error occured, but SMM + /* is_done is set to negative if an error occurred, + * and to postitive if _no_ error occurred, but SMM * was already notified. This avoids double notification * which might lead to unexpected results... */ diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c0786d446a00..984cdc62e30b 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -55,7 +55,7 @@ extern u8 acpi_gbl_permanent_mmap; /* - * Globals that are publically available, allowing for + * Globals that are publicly available, allowing for * run time configuration */ extern u32 acpi_dbg_level; -- cgit v1.2.3-59-g8ed1b From e454c844644683571617896ab2a4ce0109c1943e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 21 Sep 2010 16:31:11 -0300 Subject: Bluetooth: Fix deadlock in the ERTM logic The Enhanced Retransmission Mode(ERTM) is a realiable mode of operation of the Bluetooth L2CAP layer. Think on it like a simplified version of TCP. The problem we were facing here was a deadlock. ERTM uses a backlog queue to queue incomimg packets while the user is helding the lock. At some moment the sk_sndbuf can be exceeded and we can't alloc new skbs then the code sleep with the lock to wait for memory, that stalls the ERTM connection once we can't read the acknowledgements packets in the backlog queue to free memory and make the allocation of outcoming skb successful. This patch actually affect all users of bt_skb_send_alloc(), i.e., all L2CAP modes and SCO. We are safe against socket states changes or channels deletion while the we are sleeping wait memory. Checking for the sk->sk_err and sk->sk_shutdown make the code safe, since any action that can leave the socket or the channel in a not usable state set one of the struct members at least. Then we can check both of them when getting the lock again and return with the proper error if something unexpected happens. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ulisses Furquim --- include/net/bluetooth/bluetooth.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 27a902d9b3a9..30fce0128dd7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l { struct sk_buff *skb; + release_sock(sk); if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } + lock_sock(sk); + + if (!skb && *err) + return NULL; + + *err = sock_error(sk); + if (*err) + goto out; + + if (sk->sk_shutdown) { + *err = -ECONNRESET; + goto out; + } return skb; + +out: + kfree_skb(skb); + return NULL; } int bt_err(__u16 code); -- cgit v1.2.3-59-g8ed1b From 29d08b3efddca628b0360411ab2b85f7b1723f48 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 27 Sep 2010 16:17:17 +1000 Subject: drm/gem: handlecount isn't really a kref so don't make it one. There were lots of places being inconsistent since handle count looked like a kref but it really wasn't. Fix this my just making handle count an atomic on the object, and have it increase the normal object kref. Now i915/radeon/nouveau drivers can drop the normal reference on userspace object creation, and have the handle hold it. This patch fixes a memory leak or corruption on unload, because the driver had no way of knowing if a handle had been actually added for this object, and the fbcon object needed to know this to clean itself up properly. Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 8 ++------ drivers/gpu/drm/drm_info.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 6 ++---- drivers/gpu/drm/i915/intel_fb.c | 4 +++- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + drivers/gpu/drm/nouveau/nouveau_gem.c | 6 ++---- drivers/gpu/drm/nouveau/nouveau_notifier.c | 1 + drivers/gpu/drm/radeon/radeon_display.c | 3 ++- drivers/gpu/drm/radeon/radeon_fb.c | 14 ++++---------- drivers/gpu/drm/radeon/radeon_gem.c | 4 ++-- include/drm/drmP.h | 18 +++++++++++++----- 11 files changed, 33 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6fe2cd298c12..f7e61be8430a 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -148,7 +148,7 @@ int drm_gem_object_init(struct drm_device *dev, return -ENOMEM; kref_init(&obj->refcount); - kref_init(&obj->handlecount); + atomic_set(&obj->handle_count, 0); obj->size = size; atomic_inc(&dev->object_count); @@ -496,12 +496,8 @@ static void drm_gem_object_ref_bug(struct kref *list_kref) * called before drm_gem_object_free or we'll be touching * freed memory */ -void -drm_gem_object_handle_free(struct kref *kref) +void drm_gem_object_handle_free(struct drm_gem_object *obj) { - struct drm_gem_object *obj = container_of(kref, - struct drm_gem_object, - handlecount); struct drm_device *dev = obj->dev; /* Remove any name for this object */ diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 2ef2c7827243..974e970ce3f8 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -255,7 +255,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, - atomic_read(&obj->handlecount.refcount), + atomic_read(&obj->handle_count), atomic_read(&obj->refcount.refcount)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cf4ffbee1c00..4cdf74264ee8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,14 +136,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; ret = drm_gem_handle_create(file_priv, obj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(obj); if (ret) { - drm_gem_object_unreference_unlocked(obj); return ret; } - /* Sink the floating reference from kref_init(handlecount) */ - drm_gem_object_handle_unreference_unlocked(obj); - args->handle = handle; return 0; } diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 7bdc96256bf5..56ad9df2ccb5 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -237,8 +237,10 @@ int intel_fbdev_destroy(struct drm_device *dev, drm_fb_helper_fini(&ifbdev->helper); drm_framebuffer_cleanup(&ifb->base); - if (ifb->obj) + if (ifb->obj) { + drm_gem_object_handle_unreference(ifb->obj); drm_gem_object_unreference(ifb->obj); + } return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index dbd30b2e43fd..d2047713dc59 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -352,6 +352,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev) if (nouveau_fb->nvbo) { nouveau_bo_unmap(nouveau_fb->nvbo); + drm_gem_object_handle_unreference_unlocked(nouveau_fb->nvbo->gem); drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem); nouveau_fb->nvbo = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index ead7b8fc53fc..19620a6709f5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -167,11 +167,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data, goto out; ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(nvbo->gem); out: - drm_gem_object_handle_unreference_unlocked(nvbo->gem); - - if (ret) - drm_gem_object_unreference_unlocked(nvbo->gem); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c index 3ec181ff50ce..3c9964a8fbad 100644 --- a/drivers/gpu/drm/nouveau/nouveau_notifier.c +++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c @@ -79,6 +79,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan) mutex_lock(&dev->struct_mutex); nouveau_bo_unpin(chan->notifier_bo); mutex_unlock(&dev->struct_mutex); + drm_gem_object_handle_unreference_unlocked(chan->notifier_bo->gem); drm_gem_object_unreference_unlocked(chan->notifier_bo->gem); drm_mm_takedown(&chan->notifier_heap); } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 7422f274615a..b92d2f2fcbed 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -843,8 +843,9 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - if (radeon_fb->obj) + if (radeon_fb->obj) { drm_gem_object_unreference_unlocked(radeon_fb->obj); + } drm_framebuffer_cleanup(fb); kfree(radeon_fb); } diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index c74a8b20d941..9cdf6a35bc2c 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -94,8 +94,10 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj) ret = radeon_bo_reserve(rbo, false); if (likely(ret == 0)) { radeon_bo_kunmap(rbo); + radeon_bo_unpin(rbo); radeon_bo_unreserve(rbo); } + drm_gem_object_handle_unreference(gobj); drm_gem_object_unreference_unlocked(gobj); } @@ -325,8 +327,6 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb { struct fb_info *info; struct radeon_framebuffer *rfb = &rfbdev->rfb; - struct radeon_bo *rbo; - int r; if (rfbdev->helper.fbdev) { info = rfbdev->helper.fbdev; @@ -338,14 +338,8 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb } if (rfb->obj) { - rbo = rfb->obj->driver_private; - r = radeon_bo_reserve(rbo, false); - if (likely(r == 0)) { - radeon_bo_kunmap(rbo); - radeon_bo_unpin(rbo); - radeon_bo_unreserve(rbo); - } - drm_gem_object_unreference_unlocked(rfb->obj); + radeonfb_destroy_pinned_object(rfb->obj); + rfb->obj = NULL; } drm_fb_helper_fini(&rfbdev->helper); drm_framebuffer_cleanup(&rfb->base); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c578f265b24c..d1e595d91723 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -201,11 +201,11 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return r; } r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); if (r) { - drm_gem_object_unreference_unlocked(gobj); return r; } - drm_gem_object_handle_unreference_unlocked(gobj); args->handle = handle; return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 774e1d49509b..07e4726a4ee0 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -612,7 +612,7 @@ struct drm_gem_object { struct kref refcount; /** Handle count of this object. Each handle also holds a reference */ - struct kref handlecount; + atomic_t handle_count; /* number of handles on this object */ /** Related drm device */ struct drm_device *dev; @@ -1461,7 +1461,7 @@ struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_object_handle_free(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); @@ -1496,7 +1496,7 @@ static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) { drm_gem_object_reference(obj); - kref_get(&obj->handlecount); + atomic_inc(&obj->handle_count); } static inline void @@ -1505,12 +1505,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference(obj); } @@ -1520,12 +1523,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; + /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference_unlocked(obj); } -- cgit v1.2.3-59-g8ed1b From 6110a1f43c27b516e16d5ce8860fca50748c2a87 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 30 Sep 2010 21:19:07 -0400 Subject: intel_idle: Voluntary leave_mm before entering deeper Avoid TLB flush IPIs for the cores in deeper c-states by voluntary leave_mm() before entering into that state. CPUs tend to flush TLB in those c-states anyways. acpi_idle does this with C3-type states, but it was not caried over when intel_idle was introduced. intel_idle can apply it to C-states in addition to those that ACPI might export as C3... Signed-off-by: Suresh Siddha Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 18 ++++++++++++++---- include/linux/cpuidle.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 96bf38097996..0906fc5b69b9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -108,7 +108,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C3", .desc = "MWAIT 0x10", .driver_data = (void *) 0x10, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .power_usage = 500, .target_residency = 80, @@ -117,7 +117,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C6", .desc = "MWAIT 0x20", .driver_data = (void *) 0x20, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 350, .target_residency = 800, @@ -149,7 +149,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C4", .desc = "MWAIT 0x30", .driver_data = (void *) 0x30, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .power_usage = 250, .target_residency = 400, @@ -159,7 +159,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C6", .desc = "MWAIT 0x40", .driver_data = (void *) 0x40, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 150, .target_residency = 800, @@ -185,6 +185,16 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) local_irq_disable(); + /* + * If the state flag indicates that the TLB will be flushed or if this + * is the deepest c-state supported, do a voluntary leave mm to avoid + * costly and mostly unnecessary wakeups for flushing the user TLB's + * associated with the active mm. + */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED || + (&dev->states[dev->state_count - 1] == state)) + leave_mm(cpu); + if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36ca9721a0c2..1be416bbbb82 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -53,6 +53,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit v1.2.3-59-g8ed1b From 39b4d07aa3583ceefe73622841303a0a3e942ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Sep 2010 09:10:26 +0100 Subject: drm: Hold the mutex when dropping the last GEM reference (v2) In order to be fully threadsafe we need to check that the drm_gem_object refcount is still 0 after acquiring the mutex in order to call the free function. Otherwise, we may encounter scenarios like: Thread A: Thread B: drm_gem_close unreference_unlocked kref_put mutex_lock ... i915_gem_evict ... kref_get -> BUG ... i915_gem_unbind ... kref_put ... i915_gem_object_free ... mutex_unlock mutex_lock i915_gem_object_free -> BUG i915_gem_object_unbind kfree mutex_unlock Note that no driver is currently using the free_unlocked vfunc and it is scheduled for removal, hasten that process. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30454 Reported-and-Tested-by: Magnus Kessler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 22 ---------------------- include/drm/drmP.h | 10 ++++++---- 2 files changed, 6 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index f7e61be8430a..5663d2719063 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -462,28 +462,6 @@ drm_gem_object_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_free); -/** - * Called after the last reference to the object has been lost. - * Must be called without holding struct_mutex - * - * Frees the object - */ -void -drm_gem_object_free_unlocked(struct kref *kref) -{ - struct drm_gem_object *obj = (struct drm_gem_object *) kref; - struct drm_device *dev = obj->dev; - - if (dev->driver->gem_free_object_unlocked != NULL) - dev->driver->gem_free_object_unlocked(obj); - else if (dev->driver->gem_free_object != NULL) { - mutex_lock(&dev->struct_mutex); - dev->driver->gem_free_object(obj); - mutex_unlock(&dev->struct_mutex); - } -} -EXPORT_SYMBOL(drm_gem_object_free_unlocked); - static void drm_gem_object_ref_bug(struct kref *list_kref) { BUG(); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 07e4726a4ee0..4c9461a4f9e6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -808,7 +808,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); @@ -1456,7 +1455,6 @@ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); -void drm_gem_object_free_unlocked(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, @@ -1484,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj) static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free_unlocked); + if (obj != NULL) { + struct drm_device *dev = obj->dev; + mutex_lock(&dev->struct_mutex); + kref_put(&obj->refcount, drm_gem_object_free); + mutex_unlock(&dev->struct_mutex); + } } int drm_gem_handle_create(struct drm_file *file_priv, -- cgit v1.2.3-59-g8ed1b From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/avr32/kernel/module.c | 3 +-- arch/h8300/kernel/module.c | 3 +-- arch/mn10300/kernel/module.c | 3 +-- arch/parisc/kernel/module.c | 3 +-- arch/powerpc/kernel/module.c | 5 ----- arch/s390/kernel/module.c | 3 +-- arch/sh/kernel/module.c | 2 -- arch/x86/kernel/module.c | 3 +-- include/linux/module.h | 5 ++--- kernel/module.c | 4 ++++ lib/bug.c | 6 ++---- 11 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9c..a727f54d64d6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20d..db4953dc4e1b 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd76993..196a111e2e29 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90c..6e81bb596e5b 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e0140..4ef93ae2235f 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sect; int err; - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); if (sect != NULL) @@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c355..f7167ee4604c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c04..ae0be697a89e 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..1c355c550960 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..aace066bad8f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..ccd641991842 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/lib/bug.c b/lib/bug.c index 7cdfad88128f..19552096d16b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) -- cgit v1.2.3-59-g8ed1b From 231d0aefd88e94129cb8fb84794f9bb788c6366e Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Tue, 5 Oct 2010 12:47:57 +0400 Subject: wait: using uninitialized member of wait queue The "flags" member of "struct wait_queue_t" is used in several places in the kernel code without beeing initialized by init_wait(). "flags" is used in bitwise operations. If "flags" not initialized then unexpected behaviour may take place. Incorrect flags might used later in code. Added initialization of "wait_queue_t.flags" with zero value into "init_wait". Signed-off-by: Evgeny Kuznetsov [ The bit we care about does end up being initialized by both prepare_to_wait() and add_to_wait_queue(), so this doesn't seem to cause actual bugs, but is definitely the right thing to do -Linus ] Signed-off-by: Linus Torvalds --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0836ccc57121..3efc9f3f43a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) /** -- cgit v1.2.3-59-g8ed1b From 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Sep 2010 12:36:45 +0200 Subject: drm/ttm: Fix two race conditions + fix busy codepaths This fixes a race pointed out by Dave Airlie where we don't take a buffer object about to be destroyed off the LRU lists properly. It also fixes a rare case where a buffer object could be destroyed in the middle of an accelerated eviction. The patch also adds a utility function that can be used to prematurely release GPU memory space usage of an object waiting to be destroyed. For example during eviction or swapout. The above mentioned commit didn't queue the buffer on the delayed destroy list under some rare circumstances. It also didn't completely honor the remove_all parameter. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=615505 http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061 Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 83 +++++++++++++++++++++++++++++++++++++------- include/drm/ttm/ttm_bo_api.h | 4 ++- 2 files changed, 74 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index cb4cf7ef4d1e..db809e034cc4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -441,6 +441,43 @@ out_err: return ret; } +/** + * Call bo::reserved and with the lru lock held. + * Will release GPU memory type usage on destruction. + * This is the place to put in driver specific hooks. + * Will release the bo::reserved lock and the + * lru lock on exit. + */ + +static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) +{ + struct ttm_bo_global *glob = bo->glob; + + if (bo->ttm) { + + /** + * Release the lru_lock, since we don't want to have + * an atomic requirement on ttm_tt[unbind|destroy]. + */ + + spin_unlock(&glob->lru_lock); + ttm_tt_unbind(bo->ttm); + ttm_tt_destroy(bo->ttm); + bo->ttm = NULL; + spin_lock(&glob->lru_lock); + } + + if (bo->mem.mm_node) { + drm_mm_put_block(bo->mem.mm_node); + bo->mem.mm_node = NULL; + } + + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + spin_unlock(&glob->lru_lock); +} + + /** * If bo idle, remove from delayed- and lru lists, and unref. * If not idle, and already on delayed list, do nothing. @@ -456,6 +493,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) int ret; spin_lock(&bo->lock); +retry: (void) ttm_bo_wait(bo, false, false, !remove_all); if (!bo->sync_obj) { @@ -464,31 +502,52 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all) spin_unlock(&bo->lock); spin_lock(&glob->lru_lock); - put_count = ttm_bo_del_from_lru(bo); + ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0); + + /** + * Someone else has the object reserved. Bail and retry. + */ - ret = ttm_bo_reserve_locked(bo, false, false, false, 0); - BUG_ON(ret); - if (bo->ttm) - ttm_tt_unbind(bo->ttm); + if (unlikely(ret == -EBUSY)) { + spin_unlock(&glob->lru_lock); + spin_lock(&bo->lock); + goto requeue; + } + + /** + * We can re-check for sync object without taking + * the bo::lock since setting the sync object requires + * also bo::reserved. A busy object at this point may + * be caused by another thread starting an accelerated + * eviction. + */ + + if (unlikely(bo->sync_obj)) { + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + spin_unlock(&glob->lru_lock); + spin_lock(&bo->lock); + if (remove_all) + goto retry; + else + goto requeue; + } + + put_count = ttm_bo_del_from_lru(bo); if (!list_empty(&bo->ddestroy)) { list_del_init(&bo->ddestroy); ++put_count; } - if (bo->mem.mm_node) { - drm_mm_put_block(bo->mem.mm_node); - bo->mem.mm_node = NULL; - } - spin_unlock(&glob->lru_lock); - atomic_set(&bo->reserved, 0); + ttm_bo_cleanup_memtype_use(bo); while (put_count--) kref_put(&bo->list_kref, ttm_bo_ref_bug); return 0; } - +requeue: spin_lock(&glob->lru_lock); if (list_empty(&bo->ddestroy)) { void *sync_obj = bo->sync_obj; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 267a86c74e2e..2040e6c4f172 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -246,9 +246,11 @@ struct ttm_buffer_object { atomic_t reserved; - /** * Members protected by the bo::lock + * In addition, setting sync_obj to anything else + * than NULL requires bo::reserved to be held. This allows for + * checking NULL while reserved but not holding bo::lock. */ void *sync_obj_arg; -- cgit v1.2.3-59-g8ed1b From 430c62fb2948d964cf8dc7f3e2f69623c04ef62f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Oct 2010 09:35:16 +0200 Subject: elevator: fix oops on early call to elevator_change() 2.6.36 introduces an API for drivers to switch the IO scheduler instead of manually calling the elevator exit and init functions. This API was added since q->elevator must be cleared in between those two calls. And since we already have this functionality directly from use by the sysfs interface to switch schedulers online, it was prudent to reuse it internally too. But this API needs the queue to be in a fully initialized state before it is called, or it will attempt to unregister elevator kobjects before they have been added. This results in an oops like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000051 IP: [] sysfs_create_dir+0x2e/0xc0 PGD 47ddfc067 PUD 47c6a1067 PMD 0 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/0000:04:00.1/irq CPU 2 Modules linked in: t(+) loop hid_apple usbhid ahci ehci_hcd uhci_hcd libahci usbcore nls_base igb Pid: 7319, comm: modprobe Not tainted 2.6.36-rc6+ #132 QSSC-S4R/QSSC-S4R RIP: 0010:[] [] sysfs_create_dir+0x2e/0xc0 RSP: 0018:ffff88027da25d08 EFLAGS: 00010246 RAX: ffff88047c68c528 RBX: 00000000fffffffe RCX: 0000000000000000 RDX: 000000000000002f RSI: 000000000000002f RDI: ffff88047e196c88 RBP: ffff88027da25d38 R08: 0000000000000000 R09: d84156c5635688c0 R10: d84156c5635688c0 R11: 0000000000000000 R12: ffff88047e196c88 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88047c68c528 FS: 00007fcb0b26f6e0(0000) GS:ffff880287400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000051 CR3: 000000047e76e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 7319, threadinfo ffff88027da24000, task ffff88027d377090) Stack: ffff88027da25d58 ffff88047c68c528 00000000fffffffe ffff88047e196c88 <0> ffff88047c68c528 ffff88047e05bd90 ffff88027da25d78 ffffffff8123fb77 <0> ffff88047e05bd90 0000000000000000 ffff88047e196c88 ffff88047c68c528 Call Trace: [] kobject_add_internal+0xe7/0x1f0 [] kobject_add_varg+0x38/0x60 [] kobject_add+0x69/0x90 [] ? sysfs_remove_dir+0x20/0xa0 [] ? sub_preempt_count+0x9d/0xe0 [] ? _raw_spin_unlock+0x30/0x50 [] ? sysfs_remove_dir+0x20/0xa0 [] ? sysfs_remove_dir+0x34/0xa0 [] elv_register_queue+0x34/0xa0 [] elevator_change+0xfd/0x250 [] ? t_init+0x0/0x361 [t] [] ? t_init+0x0/0x361 [t] [] t_init+0xa8/0x361 [t] [] do_one_initcall+0x3e/0x170 [] sys_init_module+0xbd/0x220 [] system_call_fastpath+0x16/0x1b Code: e5 41 56 41 55 41 54 49 89 fc 53 48 83 ec 10 48 85 ff 74 52 48 8b 47 18 49 c7 c5 00 46 61 81 48 85 c0 74 04 4c 8b 68 30 45 31 f6 <41> 80 7d 51 00 74 0e 49 8b 44 24 28 4c 89 e7 ff 50 20 49 89 c6 RIP [] sysfs_create_dir+0x2e/0xc0 RSP CR2: 0000000000000051 ---[ end trace a6541d3bf07945df ]--- Fix this by adding a registered bit to the elevator queue, which is set when the sysfs kobjects have been registered. Signed-off-by: Jens Axboe --- block/elevator.c | 12 ++++++++---- include/linux/elevator.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/elevator.c b/block/elevator.c index 205b09a5bd9e..4e11559aa2b0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -938,6 +938,7 @@ int elv_register_queue(struct request_queue *q) } } kobject_uevent(&e->kobj, KOBJ_ADD); + e->registered = 1; } return error; } @@ -947,6 +948,7 @@ static void __elv_unregister_queue(struct elevator_queue *e) { kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); + e->registered = 0; } void elv_unregister_queue(struct request_queue *q) @@ -1042,11 +1044,13 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) spin_unlock_irq(q->queue_lock); - __elv_unregister_queue(old_elevator); + if (old_elevator->registered) { + __elv_unregister_queue(old_elevator); - err = elv_register_queue(q); - if (err) - goto fail_register; + err = elv_register_queue(q); + if (err) + goto fail_register; + } /* * finally exit old elevator and turn off BYPASS. diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 926b50322a46..4fd978e7eb83 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -93,6 +93,7 @@ struct elevator_queue struct elevator_type *elevator_type; struct mutex sysfs_lock; struct hlist_head *hash; + unsigned int registered:1; }; /* -- cgit v1.2.3-59-g8ed1b From 7c5347733dcc4ba0bac0baf86d99fae0561f33b7 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 11 Oct 2010 18:13:31 -0400 Subject: fanotify: disable fanotify syscalls This patch disables the fanotify syscalls by just not building them and letting the cond_syscall() statements in kernel/sys_ni.c redirect them to sys_ni_syscall(). It was pointed out by Tvrtko Ursulin that the fanotify interface did not include an explicit prioritization between groups. This is necessary for fanotify to be usable for hierarchical storage management software, as they must get first access to the file, before inotify-like notifiers see the file. This feature can be added in an ABI compatible way in the next release (by using a number of bits in the flags field to carry the info) but it was suggested by Alan that maybe we should just hold off and do it in the next cycle, likely with an (new) explicit argument to the syscall. I don't like this approach best as I know people are already starting to use the current interface, but Alan is all wise and noone on list backed me up with just using what we have. I feel this is needlessly ripping the rug out from under people at the last minute, but if others think it needs to be a new argument it might be the best way forward. Three choices: Go with what we got (and implement the new feature next cycle). Add a new field right now (and implement the new feature next cycle). Wait till next cycle to release the ABI (and implement the new feature next cycle). This is number 3. Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds --- fs/notify/Kconfig | 2 +- include/linux/Kbuild | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..b388443c3a09 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -3,4 +3,4 @@ config FSNOTIFY source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" -source "fs/notify/fanotify/Kconfig" +#source "fs/notify/fanotify/Kconfig" diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429ff..4e8ea8c8ec1e 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -118,7 +118,6 @@ header-y += eventpoll.h header-y += ext2_fs.h header-y += fadvise.h header-y += falloc.h -header-y += fanotify.h header-y += fb.h header-y += fcntl.h header-y += fd.h -- cgit v1.2.3-59-g8ed1b From 0eead9ab41da33644ae2c97c57ad03da636a0422 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Oct 2010 10:57:40 -0700 Subject: Don't dump task struct in a.out core-dumps akiphie points out that a.out core-dumps have that odd task struct dumping that was never used and was never really a good idea (it goes back into the mists of history, probably the original core-dumping code). Just remove it. Also do the access_ok() check on dump_write(). It probably doesn't matter (since normal filesystems all seem to do it anyway), but he points out that it's normally done by the VFS layer, so ... [ I suspect that we should possibly do "vfs_write()" instead of calling ->write directly. That also does the whole fsnotify and write statistics thing, which may or may not be a good idea. ] And just to be anal, do this all for the x86-64 32-bit a.out emulation code too, even though it's not enabled (and won't currently even compile) Reported-by: akiphie Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 22 +++++----------------- fs/binfmt_aout.c | 4 ---- include/linux/coredump.h | 2 +- 3 files changed, 6 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 0350311906ae..2d93bdbc9ac0 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -34,7 +34,7 @@ #include #undef WARN_OLD -#undef CORE_DUMP /* probably broken */ +#undef CORE_DUMP /* definitely broken */ static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); static int load_aout_library(struct file *); @@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end) * macros to write out all the necessary info. */ -static int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} +#include #define DUMP_WRITE(addr, nr) \ if (!dump_write(file, (void *)(addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(offset) \ - if (file->f_op->llseek) { \ - if (file->f_op->llseek(file, (offset), 0) != (offset)) \ - goto end_coredump; \ - } else \ - file->f_pos = (offset) +#define DUMP_SEEK(offset) \ + if (!dump_seek(file, offset)) \ + goto end_coredump; #define START_DATA() (u.u_tsize << PAGE_SHIFT) #define START_STACK(u) (u.start_stack) @@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, dump_size = dump.u_ssize << PAGE_SHIFT; DUMP_WRITE(dump_start, dump_size); } - /* - * Finally dump the task struct. Not be used by gdb, but - * could be useful - */ - set_fs(KERNEL_DS); - DUMP_WRITE(current, sizeof(*current)); end_coredump: set_fs(fs); return has_dumped; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f96eff04e11a..a6395bdb26ae 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm) if (!dump_write(file, dump_start, dump_size)) goto end_coredump; } -/* Finally dump the task struct. Not be used by gdb, but could be useful */ - set_fs(KERNEL_DS); - if (!dump_write(file, current, sizeof(*current))) - goto end_coredump; end_coredump: set_fs(fs); return has_dumped; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 8ba66a9d9022..59579cfee6a0 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -11,7 +11,7 @@ */ static inline int dump_write(struct file *file, const void *addr, int nr) { - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; } static inline int dump_seek(struct file *file, loff_t off) -- cgit v1.2.3-59-g8ed1b From 3aa0ce825ade0cf5506e32ccf51d01fc8d22a9cf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Oct 2010 14:32:06 -0700 Subject: Un-inline the core-dump helper functions Tony Luck reports that the addition of the access_ok() check in commit 0eead9ab41da ("Don't dump task struct in a.out core-dumps") broke the ia64 compile due to missing the necessary header file includes. Rather than add yet another include () to make everything happy, just uninline the silly core dump helper functions and move the bodies to fs/exec.c where they make a lot more sense. dump_seek() in particular was too big to be an inline function anyway, and none of them are in any way performance-critical. And we really don't need to mess up our include file headers more than they already are. Reported-and-tested-by: Tony Luck Signed-off-by: Linus Torvalds --- fs/exec.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/coredump.h | 34 ++-------------------------------- 2 files changed, 40 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 828dd2461d6b..03278c984ba0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2014,3 +2014,41 @@ fail_creds: fail: return; } + +/* + * Core dumping helper functions. These are the only things you should + * do on a core-file: use only these functions to write out all the + * necessary info. + */ +int dump_write(struct file *file, const void *addr, int nr) +{ + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} + +int dump_seek(struct file *file, loff_t off) +{ + int ret = 1; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + return 0; + } else { + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) { + ret = 0; + break; + } + off -= n; + } + free_page((unsigned long)buf); + } + return ret; +} diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 59579cfee6a0..ba4b85a6d9b8 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -9,37 +9,7 @@ * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -static inline int dump_write(struct file *file, const void *addr, int nr) -{ - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} - -static inline int dump_seek(struct file *file, loff_t off) -{ - int ret = 1; - - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; - } - free_page((unsigned long)buf); - } - return ret; -} +extern int dump_write(struct file *file, const void *addr, int nr); +extern int dump_seek(struct file *file, loff_t off); #endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3-59-g8ed1b From 79b5dc0c64d88cda3da23b2e22a5cec0964372ac Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 15 Oct 2010 14:34:14 -0700 Subject: types.h: define __aligned_u64 and expose to userspace We currently have a kernel internal type called aligned_u64 which aligns __u64's on 8 bytes boundaries even on systems which would normally align them on 4 byte boundaries. This patch creates a new type __aligned_u64 which does the same thing but which is exposed to userspace rather than being kernel internal. [akpm: merge early as both the net and audit trees want this] [akpm@linux-foundation.org: enhance the comment describing the reasons for using aligned_u64. Via Andreas and Andi.] Based-on-patch-by: Andreas Gruenbacher Signed-off-by: Eric Paris Cc: Jan Engelhardt Cc: David Miller Cc: Andi Kleen Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index 01a082f56ef4..357dbc19606f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -121,7 +121,15 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* + * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid + * common 32/64-bit compat problems. + * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other + * architectures) and to 8-byte boundaries on 64-bit architetures. The new + * aligned_64 type enforces 8-byte alignment so that structs containing + * aligned_64 values have the same alignment on 32-bit and 64-bit architectures. + * No conversions are necessary between 32-bit user-space and a 64-bit kernel. + */ #define aligned_u64 __u64 __attribute__((aligned(8))) #define aligned_be64 __be64 __attribute__((aligned(8))) #define aligned_le64 __le64 __attribute__((aligned(8))) @@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64; typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; +/* this is a special 64bit data type that is 8-byte aligned */ +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + #ifdef __KERNEL__ typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; -- cgit v1.2.3-59-g8ed1b From 363129ea90e0835b8552b797714cd200f674e287 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sat, 16 Oct 2010 13:24:52 -0400 Subject: ALSA: fix unused warnings with snd_power_get_state If we compile the ASoC code with PM disabled, we hit stuff like: sound/soc/soc-dapm.c: In function 'snd_soc_dapm_suspend_check': sound/soc/soc-dapm.c:440: warning: unused variable 'codec' So tweak the stub macro to avoid these issues. Signed-off-by: Mike Frysinger Signed-off-by: Takashi Iwai --- include/sound/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 89e0ac17f44a..c129f0813bae 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -179,7 +179,7 @@ int snd_power_wait(struct snd_card *card, unsigned int power_state); #define snd_power_lock(card) do { (void)(card); } while (0) #define snd_power_unlock(card) do { (void)(card); } while (0) static inline int snd_power_wait(struct snd_card *card, unsigned int state) { return 0; } -#define snd_power_get_state(card) SNDRV_CTL_POWER_D0 +#define snd_power_get_state(card) ({ (void)(card); SNDRV_CTL_POWER_D0; }) #define snd_power_change_state(card, state) do { (void)(card); } while (0) #endif /* CONFIG_PM */ -- cgit v1.2.3-59-g8ed1b From c08d91695b2a3349254a62b60f03f7971bd90fa0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Oct 2010 10:40:53 +0200 Subject: ALSA: tlv - Define numbers in sound/tlv.h Signed-off-by: Takashi Iwai --- include/sound/tlv.h | 4 +++- sound/pci/hda/hda_codec.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/tlv.h b/include/sound/tlv.h index 9fd5b19ccf5c..7067e2dfb0b9 100644 --- a/include/sound/tlv.h +++ b/include/sound/tlv.h @@ -38,9 +38,11 @@ #define SNDRV_CTL_TLVT_DB_MINMAX 4 /* dB scale with min/max */ #define SNDRV_CTL_TLVT_DB_MINMAX_MUTE 5 /* dB scale with min/max with mute */ +#define TLV_DB_SCALE_MASK 0xffff +#define TLV_DB_SCALE_MUTE 0x10000 #define TLV_DB_SCALE_ITEM(min, step, mute) \ SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \ - (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) + (min), ((step) & TLV_DB_SCALE_MASK) | ((mute) ? TLV_DB_SCALE_MUTE : 0) #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \ unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 8c933c8006f4..ee134a25092c 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1843,7 +1843,7 @@ int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, val1 += ofs; val1 = ((int)val1) * ((int)val2); if (min_mute) - val2 |= 0x10000; + val2 |= TLV_DB_SCALE_MUTE; if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv)) return -EFAULT; if (put_user(2 * sizeof(unsigned int), _tlv + 1)) -- cgit v1.2.3-59-g8ed1b From 83fc3bc09518d42e8f5073e2a65884701dfadf19 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 18 Oct 2010 12:20:39 +0900 Subject: ALSA: emu10k1: Fix warning: "CCR" redefined CCR is defined in emu10k1, but SuperH is defined too. If user use this driver with SuperH, it becomes a double definition. Signed-off-by: Nobuhiro Iwamatsu Cc: Paul Mundt Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 7dc97d12253c..4f865df42f0f 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -438,6 +438,8 @@ #define CCCA_CURRADDR_MASK 0x00ffffff /* Current address of the selected channel */ #define CCCA_CURRADDR 0x18000008 +/* undefine CCR to avoid conflict with the definition for SH */ +#undef CCR #define CCR 0x09 /* Cache control register */ #define CCR_CACHEINVALIDSIZE 0x07190009 #define CCR_CACHEINVALIDSIZE_MASK 0xfe000000 /* Number of invalid samples cache for this channel */ -- cgit v1.2.3-59-g8ed1b