From dd20908a8a06b22c171f6c3fcdbdbd65bed07505 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:56:20 -0400 Subject: don't bother with {get,put}_write_access() on non-regular files it's pointless and actually leads to wrong behaviour in at least one moderately convoluted case (pipe(), close one end, try to get to another via /proc/*/fd and run into ETXTBUSY). Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/file_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index 5b24008ea4f6..79ecae62209a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -209,10 +209,10 @@ static void drop_file_write_access(struct file *file) struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - put_write_access(inode); - if (special_file(inode->i_mode)) return; + + put_write_access(inode); if (file_check_writeable(file) != 0) return; __mnt_drop_write(mnt); -- cgit v1.2.3-59-g8ed1b From 4597e695b8baa3e2620da89c7593be70cf20566b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:06:32 -0400 Subject: get rid of DEBUG_WRITECOUNT it only makes control flow in __fput() and friends more convoluted. Signed-off-by: Al Viro --- arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/s390/configs/default_defconfig | 1 - arch/sh/configs/rsk7203_defconfig | 1 - arch/xtensa/configs/iss_defconfig | 1 - arch/xtensa/configs/s6105_defconfig | 1 - fs/file_table.c | 5 ---- fs/open.c | 8 ------ include/linux/fs.h | 49 ----------------------------------- lib/Kconfig.debug | 10 ------- 10 files changed, 78 deletions(-) (limited to 'fs/file_table.c') diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c2353bf059fd..175a8b99c196 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_HIGHMEM=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 139a8308070c..fdee37fab81c 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index e0af2ee58751..3f538468a86e 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -550,7 +550,6 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 4e5229b0c5bb..47236573db83 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_FRAME_POINTER=y diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 4f233204faf9..711f8aa14743 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index d929f77a0360..78318a76fa16 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set CONFIG_DEBUG_NOMMU_REGIONS=y -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/fs/file_table.c b/fs/file_table.c index 79ecae62209a..ee20658a0647 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { percpu_counter_dec(&nr_files); - file_check_state(f); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -186,7 +185,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, * that we can do debugging checks at __fput() */ if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - file_take_write(file); WARN_ON(mnt_clone_write(path->mnt)); } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) @@ -213,10 +211,7 @@ static void drop_file_write_access(struct file *file) return; put_write_access(inode); - if (file_check_writeable(file) != 0) - return; __mnt_drop_write(mnt); - file_release_write(file); } /* the real guts of fput() - releasing the last reference to file diff --git a/fs/open.c b/fs/open.c index 2ed7325f713e..8d0b6adfe7b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -683,7 +683,6 @@ static int do_dentry_open(struct file *f, error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -731,14 +730,7 @@ cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { - /* - * We don't consider this a real - * mnt_want/drop_write() pair - * because it all happenend right - * here, so just reset the state. - */ put_write_access(inode); - file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e80659ed78fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,9 +769,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - struct file { union { struct llist_node fu_llist; @@ -809,9 +806,6 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -829,49 +823,6 @@ static inline struct file *get_file(struct file *f) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a48abeac753f..7a0859314bdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1030,16 +1030,6 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" - depends on DEBUG_KERNEL - help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. - - If unsure, say N. - config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL -- cgit v1.2.3-59-g8ed1b From 83f936c75e3689a63253d89c47a4d239c56d7410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:02:47 -0400 Subject: mark struct file that had write access grabbed by open() new flag in ->f_mode - FMODE_WRITER. Set by do_dentry_open() in case when it has grabbed write access, checked by __fput() to decide whether it wants to drop the sucker. Allows to stop bothering with mnt_clone_write() in alloc_file(), along with fewer special_file() checks. Signed-off-by: Al Viro --- fs/file_table.c | 37 ++++--------------------------------- fs/namespace.c | 4 +--- fs/open.c | 9 ++++----- include/linux/fs.h | 2 ++ 4 files changed, 11 insertions(+), 41 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file_table.c b/fs/file_table.c index ee20658a0647..ce1504fec5a1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -177,43 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_mapping = path->dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; - - /* - * These mounts don't really matter in practice - * for r/o bind mounts. They aren't userspace- - * visible. We do this for consistency, and so - * that we can do debugging checks at __fput() - */ - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - WARN_ON(mnt_clone_write(path->mnt)); - } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } EXPORT_SYMBOL(alloc_file); -/** - * drop_file_write_access - give up ability to write to a file - * @file: the file to which we will stop writing - * - * This is a central place which will give up the ability - * to write to @file, along with access to write through - * its vfsmount. - */ -static void drop_file_write_access(struct file *file) -{ - struct vfsmount *mnt = file->f_path.mnt; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - - if (special_file(inode->i_mode)) - return; - - put_write_access(inode); - __mnt_drop_write(mnt); -} - /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) @@ -248,8 +217,10 @@ static void __fput(struct file *file) put_pid(file->f_owner.pid); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); - if (file->f_mode & FMODE_WRITE) - drop_file_write_access(file); + if (file->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(mnt); + } file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; diff --git a/fs/namespace.c b/fs/namespace.c index a66aff5bd3fe..20e8696c31a7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int __mnt_want_write_file(struct file *file) { - struct inode *inode = file_inode(file); - - if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) + if (!(file->f_mode & FMODE_WRITER)) return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); diff --git a/fs/open.c b/fs/open.c index ebef0c5fa10c..dcefb2f02d10 100644 --- a/fs/open.c +++ b/fs/open.c @@ -670,6 +670,7 @@ static int do_dentry_open(struct file *f, put_write_access(inode); goto cleanup_file; } + f->f_mode |= FMODE_WRITER; } f->f_mapping = inode->i_mapping; @@ -715,11 +716,9 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - if (f->f_mode & FMODE_WRITE) { - if (!special_file(inode->i_mode)) { - put_write_access(inode); - __mnt_drop_write(f->f_path.mnt); - } + if (f->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); diff --git a/include/linux/fs.h b/include/linux/fs.h index e80659ed78fc..d9d88a0b456e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,6 +125,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +/* Write access to underlying fs */ +#define FMODE_WRITER ((__force fmode_t)0x10000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3-59-g8ed1b From 7f4b36f9bb930b3b2105a9a2cb0121fa7028c432 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:45:29 -0400 Subject: get rid of files_defer_init() the only thing it's doing these days is calculation of upper limit for fs.nr_open sysctl and that can be done statically Signed-off-by: Al Viro --- fs/file.c | 11 ++++------- fs/file_table.c | 1 - include/linux/fdtable.h | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'fs/file_table.c') diff --git a/fs/file.c b/fs/file.c index eb56a13dab3e..682103b95f8f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -25,7 +25,10 @@ int sysctl_nr_open __read_mostly = 1024*1024; int sysctl_nr_open_min = BITS_PER_LONG; -int sysctl_nr_open_max = 1024 * 1024; /* raised later */ +/* our max() is unusable in constant expressions ;-/ */ +#define __const_max(x, y) ((x) < (y) ? (x) : (y)) +int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; static void *alloc_fdmem(size_t size) { @@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk) } } -void __init files_defer_init(void) -{ - sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & - -BITS_PER_LONG; -} - struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, diff --git a/fs/file_table.c b/fs/file_table.c index ce1504fec5a1..718e8e5224f8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -325,6 +325,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - files_defer_init(); percpu_counter_init(&nr_files, 0); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 70e8e21c0a30..230f87bdf5ad 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -63,8 +63,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern void __init files_defer_init(void); - #define rcu_dereference_check_fdtable(files, fdtfd) \ rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) -- cgit v1.2.3-59-g8ed1b