From ad52184b705c1048aa01225eccde119ef5c93000 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 24 Dec 2014 14:56:48 -0500 Subject: selinuxfs: don't open-code d_genocide() Signed-off-by: Al Viro --- security/selinux/selinuxfs.c | 52 +++----------------------------------------- 1 file changed, 3 insertions(+), 49 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 33db1ad4fd10..1684bcc78b34 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1195,30 +1195,8 @@ static const struct file_operations sel_commit_bools_ops = { static void sel_remove_entries(struct dentry *de) { - struct list_head *node; - - spin_lock(&de->d_lock); - node = de->d_subdirs.next; - while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); - - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(node); - - if (d->d_inode) { - dget_dlock(d); - spin_unlock(&de->d_lock); - spin_unlock(&d->d_lock); - d_delete(d); - simple_unlink(de->d_inode, d); - dput(d); - spin_lock(&de->d_lock); - } else - spin_unlock(&d->d_lock); - node = de->d_subdirs.next; - } - - spin_unlock(&de->d_lock); + d_genocide(de); + shrink_dcache_parent(de); } #define BOOL_DIR_NAME "booleans" @@ -1668,37 +1646,13 @@ static int sel_make_class_dir_entries(char *classname, int index, return rc; } -static void sel_remove_classes(void) -{ - struct list_head *class_node; - - list_for_each(class_node, &class_dir->d_subdirs) { - struct dentry *class_subdir = list_entry(class_node, - struct dentry, d_child); - struct list_head *class_subdir_node; - - list_for_each(class_subdir_node, &class_subdir->d_subdirs) { - struct dentry *d = list_entry(class_subdir_node, - struct dentry, d_child); - - if (d->d_inode) - if (d->d_inode->i_mode & S_IFDIR) - sel_remove_entries(d); - } - - sel_remove_entries(class_subdir); - } - - sel_remove_entries(class_dir); -} - static int sel_make_classes(void) { int rc, nclasses, i; char **classes; /* delete any existing entries */ - sel_remove_classes(); + sel_remove_entries(class_dir); rc = security_get_classes(&classes, &nclasses); if (rc) -- cgit v1.2.3-59-g8ed1b From 5e993e2534e29b62790f9a2908b551b7fb3a63f0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 24 Dec 2014 21:41:47 -0500 Subject: ncpfs: get rid of d_validate() nonsense What we want is to have non-counting references to children in pagecache of parent directory, and avoid picking them after a child has been freed. Fine, so let's just have ->d_prune() clear parent's inode "has directory contents in page cache" flag. That way we don't need ->d_fsdata for storing offsets, so we can use it as a quick and dirty "is it referenced from page cache" flag. Signed-off-by: Al Viro --- fs/ncpfs/dir.c | 98 +++++++++++++++++++++++++----------------------- fs/ncpfs/ncp_fs_i.h | 1 + fs/ncpfs/ncplib_kernel.h | 30 --------------- 3 files changed, 52 insertions(+), 77 deletions(-) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 008960101520..e7ca827d7694 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -77,6 +77,7 @@ static int ncp_hash_dentry(const struct dentry *, struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); static int ncp_delete_dentry(const struct dentry *); +static void ncp_d_prune(struct dentry *dentry); const struct dentry_operations ncp_dentry_operations = { @@ -84,6 +85,7 @@ const struct dentry_operations ncp_dentry_operations = .d_hash = ncp_hash_dentry, .d_compare = ncp_compare_dentry, .d_delete = ncp_delete_dentry, + .d_prune = ncp_d_prune, }; #define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) @@ -384,42 +386,6 @@ finished: return val; } -static struct dentry * -ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) -{ - struct dentry *dent = dentry; - - if (d_validate(dent, parent)) { - if (dent->d_name.len <= NCP_MAXPATHLEN && - (unsigned long)dent->d_fsdata == fpos) { - if (!dent->d_inode) { - dput(dent); - dent = NULL; - } - return dent; - } - dput(dent); - } - - /* If a pointer is invalid, we search the dentry. */ - spin_lock(&parent->d_lock); - list_for_each_entry(dent, &parent->d_subdirs, d_child) { - if ((unsigned long)dent->d_fsdata == fpos) { - if (dent->d_inode) - dget(dent); - else - dent = NULL; - spin_unlock(&parent->d_lock); - goto out; - } - } - spin_unlock(&parent->d_lock); - return NULL; - -out: - return dent; -} - static time_t ncp_obtain_mtime(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -435,6 +401,20 @@ static time_t ncp_obtain_mtime(struct dentry *dentry) return ncp_date_dos2unix(i.modifyTime, i.modifyDate); } +static inline void +ncp_invalidate_dircache_entries(struct dentry *parent) +{ + struct ncp_server *server = NCP_SERVER(parent->d_inode); + struct dentry *dentry; + + spin_lock(&parent->d_lock); + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { + dentry->d_fsdata = NULL; + ncp_age_dentry(server, dentry); + } + spin_unlock(&parent->d_lock); +} + static int ncp_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; @@ -500,10 +480,21 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) struct dentry *dent; bool over; - dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], - dentry, ctx->pos); - if (!dent) + spin_lock(&dentry->d_lock); + if (!(NCP_FINFO(inode)->flags & NCPI_DIR_CACHE)) { + spin_unlock(&dentry->d_lock); + goto invalid_cache; + } + dent = ctl.cache->dentry[ctl.idx]; + if (unlikely(!lockref_get_not_dead(&dent->d_lockref))) { + spin_unlock(&dentry->d_lock); + goto invalid_cache; + } + spin_unlock(&dentry->d_lock); + if (!dent->d_inode) { + dput(dent); goto invalid_cache; + } over = !dir_emit(ctx, dent->d_name.name, dent->d_name.len, dent->d_inode->i_ino, DT_UNKNOWN); @@ -548,6 +539,9 @@ init_cache: ctl.filled = 0; ctl.valid = 1; read_really: + spin_lock(&dentry->d_lock); + NCP_FINFO(inode)->flags |= NCPI_DIR_CACHE; + spin_unlock(&dentry->d_lock); if (ncp_is_server_root(inode)) { ncp_read_volume_list(file, ctx, &ctl); } else { @@ -573,6 +567,13 @@ out: return result; } +static void ncp_d_prune(struct dentry *dentry) +{ + if (!dentry->d_fsdata) /* not referenced from page cache */ + return; + NCP_FINFO(dentry->d_parent->d_inode)->flags &= ~NCPI_DIR_CACHE; +} + static int ncp_fill_cache(struct file *file, struct dir_context *ctx, struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, @@ -630,6 +631,10 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); + } else { + spin_lock(&dentry->d_lock); + NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE; + spin_unlock(&dentry->d_lock); } } else { struct inode *inode = newdent->d_inode; @@ -639,12 +644,6 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, mutex_unlock(&inode->i_mutex); } - if (newdent->d_inode) { - ino = newdent->d_inode->i_ino; - newdent->d_fsdata = (void *) ctl.fpos; - ncp_new_dentry(newdent); - } - if (ctl.idx >= NCP_DIRCACHE_SIZE) { if (ctl.page) { kunmap(ctl.page); @@ -660,8 +659,13 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, ctl.cache = kmap(ctl.page); } if (ctl.cache) { - ctl.cache->dentry[ctl.idx] = newdent; - valid = 1; + if (newdent->d_inode) { + newdent->d_fsdata = newdent; + ctl.cache->dentry[ctl.idx] = newdent; + ino = newdent->d_inode->i_ino; + ncp_new_dentry(newdent); + } + valid = 1; } dput(newdent); end_advance: diff --git a/fs/ncpfs/ncp_fs_i.h b/fs/ncpfs/ncp_fs_i.h index 4b0bec477846..c4794504f843 100644 --- a/fs/ncpfs/ncp_fs_i.h +++ b/fs/ncpfs/ncp_fs_i.h @@ -22,6 +22,7 @@ struct ncp_inode_info { int access; int flags; #define NCPI_KLUDGE_SYMLINK 0x0001 +#define NCPI_DIR_CACHE 0x0002 __u8 file_handle[6]; struct inode vfs_inode; }; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index b785f74bfe3c..250e443a07f3 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -184,36 +184,6 @@ ncp_new_dentry(struct dentry* dentry) dentry->d_time = jiffies; } -static inline void -ncp_renew_dentries(struct dentry *parent) -{ - struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct dentry *dentry; - - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (dentry->d_fsdata == NULL) - ncp_age_dentry(server, dentry); - else - ncp_new_dentry(dentry); - } - spin_unlock(&parent->d_lock); -} - -static inline void -ncp_invalidate_dircache_entries(struct dentry *parent) -{ - struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct dentry *dentry; - - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - dentry->d_fsdata = NULL; - ncp_age_dentry(server, dentry); - } - spin_unlock(&parent->d_lock); -} - struct ncp_cache_head { time_t mtime; unsigned long time; /* cache age */ -- cgit v1.2.3-59-g8ed1b From d6cb125b9983e1ea9444f794b2d3ed5e3ad737b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 24 Dec 2014 22:47:00 -0500 Subject: kill d_validate() no users left Signed-off-by: Al Viro --- fs/dcache.c | 31 ------------------------------- include/linux/dcache.h | 3 --- 2 files changed, 34 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index e368d4f412f9..40432e59d72e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2187,37 +2187,6 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) } EXPORT_SYMBOL(d_hash_and_lookup); -/** - * d_validate - verify dentry provided from insecure source (deprecated) - * @dentry: The dentry alleged to be valid child of @dparent - * @dparent: The parent dentry (known to be valid) - * - * An insecure source has sent us a dentry, here we verify it and dget() it. - * This is used by ncpfs in its readdir implementation. - * Zero is returned in the dentry is invalid. - * - * This function is slow for big directories, and deprecated, do not use it. - */ -int d_validate(struct dentry *dentry, struct dentry *dparent) -{ - struct dentry *child; - - spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_child) { - if (dentry == child) { - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dparent->d_lock); - return 1; - } - } - spin_unlock(&dparent->d_lock); - - return 0; -} -EXPORT_SYMBOL(d_validate); - /* * When a file is deleted, we have two options: * - turn this dentry into a negative dentry diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5a813988e6d4..92c08cf7670e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -319,9 +319,6 @@ static inline unsigned d_count(const struct dentry *dentry) return dentry->d_lockref.count; } -/* validate "insecure" dentry pointer */ -extern int d_validate(struct dentry *, struct dentry *); - /* * helper function for dentry_operations.d_dname() members */ -- cgit v1.2.3-59-g8ed1b From d443b9fd56e85c0e58d10b75cf5eb38e0b2c4c02 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 Dec 2014 16:47:49 -0500 Subject: gut proc_register() a bit There are only 3 callers and quite a bit of that thing is executed exactly in one of those. Just lift it there... Signed-off-by: Al Viro --- fs/proc/generic.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7fea13229f33..1766fe70233e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -350,29 +350,12 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp if (ret) return ret; - if (S_ISDIR(dp->mode)) { - dp->proc_fops = &proc_dir_operations; - dp->proc_iops = &proc_dir_inode_operations; - dir->nlink++; - } else if (S_ISLNK(dp->mode)) { - dp->proc_iops = &proc_link_inode_operations; - } else if (S_ISREG(dp->mode)) { - BUG_ON(dp->proc_fops == NULL); - dp->proc_iops = &proc_file_inode_operations; - } else { - WARN_ON(1); - proc_free_inum(dp->low_ino); - return -EINVAL; - } - spin_lock(&proc_subdir_lock); dp->parent = dir; if (pde_subdir_insert(dir, dp) == false) { WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); spin_unlock(&proc_subdir_lock); - if (S_ISDIR(dp->mode)) - dir->nlink--; proc_free_inum(dp->low_ino); return -EEXIST; } @@ -431,6 +414,7 @@ struct proc_dir_entry *proc_symlink(const char *name, ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); if (ent->data) { strcpy((char*)ent->data,dest); + ent->proc_iops = &proc_link_inode_operations; if (proc_register(parent, ent) < 0) { kfree(ent->data); kfree(ent); @@ -456,8 +440,12 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { ent->data = data; + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); + parent->nlink--; ent = NULL; } } @@ -493,6 +481,8 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, return NULL; } + BUG_ON(proc_fops == NULL); + if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO; pde = __proc_create(&parent, name, mode, 1); @@ -500,6 +490,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, goto out; pde->proc_fops = proc_fops; pde->data = data; + pde->proc_iops = &proc_file_inode_operations; if (proc_register(parent, pde) < 0) goto out_free; return pde; -- cgit v1.2.3-59-g8ed1b From 5e9e2040247ac19a46d6627736ece726c46f1fdf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Dec 2014 22:49:43 -0500 Subject: get rid of lustre_dump_dentry() it's not only badly racy, it's actually dead code - the call in ll_invalidate_aliases() is unreachable. For an alias of our inode to be root dentry, the inode would have to be its ->d_inode, aka the root inode, and we never call ll_invalidate_aliases() for that. Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/dcache.c | 8 ----- .../staging/lustre/lustre/llite/llite_internal.h | 1 - drivers/staging/lustre/lustre/llite/llite_lib.c | 42 ---------------------- 3 files changed, 51 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 5bb9c85cec81..88614b71cf6d 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -263,14 +263,6 @@ void ll_invalidate_aliases(struct inode *inode) dentry, dentry, dentry->d_parent, dentry->d_inode, dentry->d_flags); - if (unlikely(dentry == dentry->d_sb->s_root)) { - CERROR("%s: called on root dentry=%p, fid="DFID"\n", - ll_get_fsname(dentry->d_sb, NULL, 0), - dentry, PFID(ll_inode2fid(inode))); - lustre_dump_dentry(dentry, 1); - dump_stack(); - } - d_lustre_invalidate(dentry, 0); } ll_unlock_dcache(inode); diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 37306e0c7aad..d032c2b086cc 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -816,7 +816,6 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry); void ll_dirty_page_discard_warn(struct page *page, int ioret); int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *, struct lookup_intent *); -void lustre_dump_dentry(struct dentry *, int recur); int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index a3367bfb1456..0af546a74154 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -665,48 +665,6 @@ int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize) return rc; } -static void ll_dump_inode(struct inode *inode) -{ - struct ll_d_hlist_node *tmp; - int dentry_count = 0; - - LASSERT(inode != NULL); - - ll_d_hlist_for_each(tmp, &inode->i_dentry) - dentry_count++; - - CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n", - inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino, - inode->i_mode, atomic_read(&inode->i_count), dentry_count); -} - -void lustre_dump_dentry(struct dentry *dentry, int recur) -{ - struct list_head *tmp; - int subdirs = 0; - - LASSERT(dentry != NULL); - - list_for_each(tmp, &dentry->d_subdirs) - subdirs++; - - CERROR("dentry %p dump: name=%pd parent=%pd (%p), inode=%p, count=%u, flags=0x%x, fsdata=%p, %d subdirs\n", - dentry, dentry, dentry->d_parent, dentry->d_parent, - dentry->d_inode, d_count(dentry), - dentry->d_flags, dentry->d_fsdata, subdirs); - if (dentry->d_inode != NULL) - ll_dump_inode(dentry->d_inode); - - if (recur == 0) - return; - - list_for_each(tmp, &dentry->d_subdirs) { - struct dentry *d = list_entry(tmp, struct dentry, d_child); - - lustre_dump_dentry(d, recur - 1); - } -} - static void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); -- cgit v1.2.3-59-g8ed1b From f4a4a8b1252a08b60cde66a6622bbca4a7f4af2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Dec 2014 09:27:07 -0500 Subject: file->f_path.dentry is pinned down for as long as the file is open... Signed-off-by: Al Viro --- kernel/auditsc.c | 5 +---- security/commoncap.c | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 072566dd0caf..55f82fce2526 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2405,7 +2405,6 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = current->audit_context; struct cpu_vfs_cap_data vcaps; - struct dentry *dentry; ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) @@ -2415,9 +2414,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, ax->d.next = context->aux; context->aux = (void *)ax; - dentry = dget(bprm->file->f_path.dentry); - get_vfs_caps_from_disk(dentry, &vcaps); - dput(dentry); + get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); ax->fcap.permitted = vcaps.permitted; ax->fcap.inheritable = vcaps.inheritable; diff --git a/security/commoncap.c b/security/commoncap.c index 2915d8503054..f66713bd7450 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -434,7 +434,6 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data */ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_cap) { - struct dentry *dentry; int rc = 0; struct cpu_vfs_cap_data vcaps; @@ -446,9 +445,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) return 0; - dentry = dget(bprm->file->f_path.dentry); - - rc = get_vfs_caps_from_disk(dentry, &vcaps); + rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", @@ -464,7 +461,6 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c __func__, rc, bprm->filename); out: - dput(dentry); if (rc) bprm_clear_caps(bprm); -- cgit v1.2.3-59-g8ed1b From 92fc41c3a612d27521241e2a550d938520fce0d6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Jan 2015 14:25:19 -0500 Subject: mode_t whack-a-mole: chelsio it's umode_t, damnit... Signed-off-by: Al Viro --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h index a3d8867efd3d..cc1d25508924 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h @@ -40,7 +40,7 @@ struct t4_debugfs_entry { const char *name; const struct file_operations *ops; - mode_t mode; + umode_t mode; unsigned char data; }; -- cgit v1.2.3-59-g8ed1b From 9e251d02041432487d89cb340e72490c4bbc198a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Jan 2015 20:40:02 -0500 Subject: kill pin_put() Signed-off-by: Al Viro --- fs/fs_pin.c | 11 ----------- include/linux/fs_pin.h | 1 - kernel/acct.c | 14 ++++++++++---- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 9368236ca100..f173313760b8 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -4,19 +4,8 @@ #include "internal.h" #include "mount.h" -static void pin_free_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct fs_pin, rcu)); -} - static DEFINE_SPINLOCK(pin_lock); -void pin_put(struct fs_pin *p) -{ - if (atomic_long_dec_and_test(&p->count)) - call_rcu(&p->rcu, pin_free_rcu); -} - void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index f66525e72ccf..68a54b7741aa 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -12,6 +12,5 @@ struct fs_pin { void (*kill)(struct fs_pin *); }; -void pin_put(struct fs_pin *); void pin_remove(struct fs_pin *); void pin_insert(struct fs_pin *, struct vfsmount *); diff --git a/kernel/acct.c b/kernel/acct.c index 33738ef972f3..7bb9e659a7da 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -124,6 +124,12 @@ out: return acct->active; } +static void acct_put(struct bsd_acct_struct *p) +{ + if (atomic_long_dec_and_test(&p->pin.count)) + kfree_rcu(p, pin.rcu); +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; @@ -144,7 +150,7 @@ again: mutex_lock(&res->lock); if (!res->ns) { mutex_unlock(&res->lock); - pin_put(&res->pin); + acct_put(res); goto again; } return res; @@ -175,7 +181,7 @@ static void acct_kill(struct bsd_acct_struct *acct, acct->ns = NULL; atomic_long_dec(&acct->pin.count); mutex_unlock(&acct->lock); - pin_put(&acct->pin); + acct_put(acct); } } @@ -186,7 +192,7 @@ static void acct_pin_kill(struct fs_pin *pin) mutex_lock(&acct->lock); if (!acct->ns) { mutex_unlock(&acct->lock); - pin_put(pin); + acct_put(acct); acct = NULL; } acct_kill(acct, NULL); @@ -576,7 +582,7 @@ static void slow_acct_process(struct pid_namespace *ns) if (acct) { do_acct_process(acct); mutex_unlock(&acct->lock); - pin_put(&acct->pin); + acct_put(acct); } } } -- cgit v1.2.3-59-g8ed1b From 32426f6653cbfde1ca16aff27a530ee36332f796 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 00:07:35 -0500 Subject: pull bumping refcount into ->kill() there will be one more change of ->kill() calling conventions; this isn't final. Signed-off-by: Al Viro --- fs/fs_pin.c | 12 ------------ kernel/acct.c | 6 ++++++ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index f173313760b8..5eb39a93a560 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -34,12 +34,6 @@ void mnt_pin_kill(struct mount *m) break; } pin = hlist_entry(p, struct fs_pin, m_list); - if (!atomic_long_inc_not_zero(&pin->count)) { - rcu_read_unlock(); - cpu_relax(); - continue; - } - rcu_read_unlock(); pin->kill(pin); } } @@ -56,12 +50,6 @@ void sb_pin_kill(struct super_block *sb) break; } pin = hlist_entry(p, struct fs_pin, s_list); - if (!atomic_long_inc_not_zero(&pin->count)) { - rcu_read_unlock(); - cpu_relax(); - continue; - } - rcu_read_unlock(); pin->kill(pin); } } diff --git a/kernel/acct.c b/kernel/acct.c index 7bb9e659a7da..a74f3d1a0c26 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -189,6 +189,12 @@ static void acct_pin_kill(struct fs_pin *pin) { struct bsd_acct_struct *acct; acct = container_of(pin, struct bsd_acct_struct, pin); + if (!atomic_long_inc_not_zero(&pin->count)) { + rcu_read_unlock(); + cpu_relax(); + return; + } + rcu_read_unlock(); mutex_lock(&acct->lock); if (!acct->ns) { mutex_unlock(&acct->lock); -- cgit v1.2.3-59-g8ed1b From 360f54796ed65939093ae373b92ebd5ef3341776 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Jan 2015 15:19:03 -0800 Subject: dcache: let the dentry count go down to zero without taking d_lock We can be more aggressive about this, if we are clever and careful. This is subtle. Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- fs/dcache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/lockref.h | 3 +- lib/lockref.c | 36 +++++++++++---- 3 files changed, 144 insertions(+), 13 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 40432e59d72e..a14d00e9839e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -508,7 +508,7 @@ static void __dentry_kill(struct dentry *dentry) * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - BUG_ON((int)dentry->d_lockref.count > 0); + BUG_ON(dentry->d_lockref.count > 0); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -561,7 +561,7 @@ static inline struct dentry *lock_parent(struct dentry *dentry) struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; - if (unlikely((int)dentry->d_lockref.count < 0)) + if (unlikely(dentry->d_lockref.count < 0)) return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; @@ -590,6 +590,110 @@ again: return parent; } +/* + * Try to do a lockless dput(), and return whether that was successful. + * + * If unsuccessful, we return false, having already taken the dentry lock. + * + * The caller needs to hold the RCU read lock, so that the dentry is + * guaranteed to stay around even if the refcount goes down to zero! + */ +static inline bool fast_dput(struct dentry *dentry) +{ + int ret; + unsigned int d_flags; + + /* + * If we have a d_op->d_delete() operation, we sould not + * let the dentry count go to zero, so use "put__or_lock". + */ + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) + return lockref_put_or_lock(&dentry->d_lockref); + + /* + * .. otherwise, we can try to just decrement the + * lockref optimistically. + */ + ret = lockref_put_return(&dentry->d_lockref); + + /* + * If the lockref_put_return() failed due to the lock being held + * by somebody else, the fast path has failed. We will need to + * get the lock, and then check the count again. + */ + if (unlikely(ret < 0)) { + spin_lock(&dentry->d_lock); + if (dentry->d_lockref.count > 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + return 1; + } + return 0; + } + + /* + * If we weren't the last ref, we're done. + */ + if (ret) + return 1; + + /* + * Careful, careful. The reference count went down + * to zero, but we don't hold the dentry lock, so + * somebody else could get it again, and do another + * dput(), and we need to not race with that. + * + * However, there is a very special and common case + * where we don't care, because there is nothing to + * do: the dentry is still hashed, it does not have + * a 'delete' op, and it's referenced and already on + * the LRU list. + * + * NOTE! Since we aren't locked, these values are + * not "stable". However, it is sufficient that at + * some point after we dropped the reference the + * dentry was hashed and the flags had the proper + * value. Other dentry users may have re-gotten + * a reference to the dentry and change that, but + * our work is done - we can leave the dentry + * around with a zero refcount. + */ + smp_rmb(); + d_flags = ACCESS_ONCE(dentry->d_flags); + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + + /* Nothing to do? Dropping the reference was all we needed? */ + if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) + return 1; + + /* + * Not the fast normal case? Get the lock. We've already decremented + * the refcount, but we'll need to re-check the situation after + * getting the lock. + */ + spin_lock(&dentry->d_lock); + + /* + * Did somebody else grab a reference to it in the meantime, and + * we're no longer the last user after all? Alternatively, somebody + * else could have killed it and marked it dead. Either way, we + * don't need to do anything else. + */ + if (dentry->d_lockref.count) { + spin_unlock(&dentry->d_lock); + return 1; + } + + /* + * Re-get the reference we optimistically dropped. We hold the + * lock, and we just tested that it was zero, so we can just + * set it to 1. + */ + dentry->d_lockref.count = 1; + return 0; +} + + /* * This is dput * @@ -622,8 +726,14 @@ void dput(struct dentry *dentry) return; repeat: - if (lockref_put_or_lock(&dentry->d_lockref)) + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); return; + } + + /* Slow case: now with the dentry lock held */ + rcu_read_unlock(); /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) @@ -810,7 +920,7 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if ((int)dentry->d_lockref.count > 0) { + if (dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4bfde0e99ed5..b10b122dd099 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -28,12 +28,13 @@ struct lockref { #endif struct { spinlock_t lock; - unsigned int count; + int count; }; }; }; extern void lockref_get(struct lockref *); +extern int lockref_put_return(struct lockref *); extern int lockref_get_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); diff --git a/lib/lockref.c b/lib/lockref.c index d2233de9a86e..ecb9a665ec19 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -60,7 +60,7 @@ void lockref_get(struct lockref *lockref) EXPORT_SYMBOL(lockref_get); /** - * lockref_get_not_zero - Increments count unless the count is 0 + * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ @@ -70,7 +70,7 @@ int lockref_get_not_zero(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) return 0; , return 1; @@ -78,7 +78,7 @@ int lockref_get_not_zero(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if (lockref->count) { + if (lockref->count > 0) { lockref->count++; retval = 1; } @@ -88,7 +88,7 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** - * lockref_get_or_lock - Increments count unless the count is 0 + * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. @@ -97,14 +97,14 @@ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); - if (!lockref->count) + if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); @@ -112,6 +112,26 @@ int lockref_get_or_lock(struct lockref *lockref) } EXPORT_SYMBOL(lockref_get_or_lock); +/** + * lockref_put_return - Decrement reference count if possible + * @lockref: pointer to lockref structure + * + * Decrement the reference count and return the new value. + * If the lockref was dead or locked, return an error. + */ +int lockref_put_return(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 0) + return -1; + , + return new.count; + ); + return -1; +} +EXPORT_SYMBOL(lockref_put_return); + /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure @@ -158,7 +178,7 @@ int lockref_get_not_dead(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if ((int)old.count < 0) + if (old.count < 0) return 0; , return 1; @@ -166,7 +186,7 @@ int lockref_get_not_dead(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if ((int) lockref->count >= 0) { + if (lockref->count >= 0) { lockref->count++; retval = 1; } -- cgit v1.2.3-59-g8ed1b From 34cece2e8a1d2b66f00e153a19b80b4d4cec4eb8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 12:47:38 -0500 Subject: take count and rcu_head out of fs_pin Signed-off-by: Al Viro --- include/linux/fs_pin.h | 10 ++-------- kernel/acct.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 68a54b7741aa..a2e71912e758 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,14 +1,8 @@ #include struct fs_pin { - atomic_long_t count; - union { - struct { - struct hlist_node s_list; - struct hlist_node m_list; - }; - struct rcu_head rcu; - }; + struct hlist_node s_list; + struct hlist_node m_list; void (*kill)(struct fs_pin *); }; diff --git a/kernel/acct.c b/kernel/acct.c index a74f3d1a0c26..b8fbefb8678f 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -80,6 +80,8 @@ static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; + atomic_long_t count; + struct rcu_head rcu; struct mutex lock; int active; unsigned long needcheck; @@ -126,8 +128,8 @@ out: static void acct_put(struct bsd_acct_struct *p) { - if (atomic_long_dec_and_test(&p->pin.count)) - kfree_rcu(p, pin.rcu); + if (atomic_long_dec_and_test(&p->count)) + kfree_rcu(p, rcu); } static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) @@ -141,7 +143,7 @@ again: rcu_read_unlock(); return NULL; } - if (!atomic_long_inc_not_zero(&res->pin.count)) { + if (!atomic_long_inc_not_zero(&res->count)) { rcu_read_unlock(); cpu_relax(); goto again; @@ -179,7 +181,7 @@ static void acct_kill(struct bsd_acct_struct *acct, pin_remove(&acct->pin); ns->bacct = new; acct->ns = NULL; - atomic_long_dec(&acct->pin.count); + atomic_long_dec(&acct->count); mutex_unlock(&acct->lock); acct_put(acct); } @@ -189,7 +191,7 @@ static void acct_pin_kill(struct fs_pin *pin) { struct bsd_acct_struct *acct; acct = container_of(pin, struct bsd_acct_struct, pin); - if (!atomic_long_inc_not_zero(&pin->count)) { + if (!atomic_long_inc_not_zero(&acct->count)) { rcu_read_unlock(); cpu_relax(); return; @@ -250,7 +252,7 @@ static int acct_on(struct filename *pathname) mnt = file->f_path.mnt; file->f_path.mnt = internal; - atomic_long_set(&acct->pin.count, 1); + atomic_long_set(&acct->count, 1); acct->pin.kill = acct_pin_kill; acct->file = file; acct->needcheck = jiffies; -- cgit v1.2.3-59-g8ed1b From 3b994d98a815d934ab6a77a380882865982c14f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 17:18:37 -0500 Subject: get rid of the second argument of acct_kill() Replace the old ns->bacct only with NULL and only if it still points to acct. And assign the new value to it *before* calling acct_kill() in acct_on(). That way we don't need to pass the new acct to acct_kill(). Signed-off-by: Al Viro --- kernel/acct.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index b8fbefb8678f..cf6588ab517b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -168,8 +168,7 @@ static void close_work(struct work_struct *work) complete(&acct->done); } -static void acct_kill(struct bsd_acct_struct *acct, - struct bsd_acct_struct *new) +static void acct_kill(struct bsd_acct_struct *acct) { if (acct) { struct pid_namespace *ns = acct->ns; @@ -179,7 +178,7 @@ static void acct_kill(struct bsd_acct_struct *acct, schedule_work(&acct->work); wait_for_completion(&acct->done); pin_remove(&acct->pin); - ns->bacct = new; + cmpxchg(&ns->bacct, acct, NULL); acct->ns = NULL; atomic_long_dec(&acct->count); mutex_unlock(&acct->lock); @@ -203,7 +202,7 @@ static void acct_pin_kill(struct fs_pin *pin) acct_put(acct); acct = NULL; } - acct_kill(acct, NULL); + acct_kill(acct); } static int acct_on(struct filename *pathname) @@ -262,10 +261,8 @@ static int acct_on(struct filename *pathname) pin_insert(&acct->pin, mnt); old = acct_get(ns); - if (old) - acct_kill(old, acct); - else - ns->bacct = acct; + ns->bacct = acct; + acct_kill(old); mutex_unlock(&acct->lock); mnt_drop_write(mnt); mntput(mnt); @@ -302,7 +299,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(task_active_pid_ns(current)), NULL); + acct_kill(acct_get(task_active_pid_ns(current))); } return error; @@ -310,7 +307,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name) void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(ns), NULL); + acct_kill(acct_get(ns)); } /* -- cgit v1.2.3-59-g8ed1b From fdab684d7202774bfd8762d4a656a553b787c8ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 11 Jan 2015 10:57:27 -0500 Subject: allow attaching fs_pin to a group not associated with some superblock Signed-off-by: Al Viro --- fs/fs_pin.c | 20 +++++++++++++------- fs/internal.h | 2 +- fs/super.c | 4 ++-- include/linux/fs_pin.h | 1 + 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 5eb39a93a560..50ef7d2ef03c 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -14,14 +14,20 @@ void pin_remove(struct fs_pin *pin) spin_unlock(&pin_lock); } -void pin_insert(struct fs_pin *pin, struct vfsmount *m) +void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) { spin_lock(&pin_lock); - hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); + if (p) + hlist_add_head(&pin->s_list, p); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } +void pin_insert(struct fs_pin *pin, struct vfsmount *m) +{ + pin_insert_group(pin, m, &m->mnt_sb->s_pins); +} + void mnt_pin_kill(struct mount *m) { while (1) { @@ -38,18 +44,18 @@ void mnt_pin_kill(struct mount *m) } } -void sb_pin_kill(struct super_block *sb) +void group_pin_kill(struct hlist_head *p) { while (1) { - struct hlist_node *p; + struct hlist_node *q; struct fs_pin *pin; rcu_read_lock(); - p = ACCESS_ONCE(sb->s_pins.first); - if (!p) { + q = ACCESS_ONCE(p->first); + if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, s_list); + pin = hlist_entry(q, struct fs_pin, s_list); pin->kill(pin); } } diff --git a/fs/internal.h b/fs/internal.h index e9a61fe67575..a6efd2f09ba3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -145,7 +145,7 @@ extern const struct file_operations pipefifo_fops; /* * fs_pin.c */ -extern void sb_pin_kill(struct super_block *sb); +extern void group_pin_kill(struct hlist_head *p); extern void mnt_pin_kill(struct mount *m); /* diff --git a/fs/super.c b/fs/super.c index eae088f6aaae..2d822459bc3d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -706,9 +706,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); if (remount_ro) { - if (sb->s_pins.first) { + if (!hlist_empty(&sb->s_pins)) { up_write(&sb->s_umount); - sb_pin_kill(sb); + group_pin_kill(&sb->s_pins); down_write(&sb->s_umount); if (!sb->s_root) return 0; diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index a2e71912e758..2be38d1464ae 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -7,4 +7,5 @@ struct fs_pin { }; void pin_remove(struct fs_pin *); +void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); -- cgit v1.2.3-59-g8ed1b From 59eda0e07f43c950d31756213b607af673e551f0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 17:53:21 -0500 Subject: new fs_pin killing logics Signed-off-by: Al Viro --- fs/fs_pin.c | 54 +++++++++++++++++++++++++---- include/linux/fs_pin.h | 13 ++++++- include/linux/pid_namespace.h | 4 +-- kernel/acct.c | 81 ++++++++++++++++++------------------------- 4 files changed, 96 insertions(+), 56 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 50ef7d2ef03c..0c77bdc238b2 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,4 +1,5 @@ #include +#include #include #include #include "internal.h" @@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin) hlist_del(&pin->m_list); hlist_del(&pin->s_list); spin_unlock(&pin_lock); + spin_lock_irq(&pin->wait.lock); + pin->done = 1; + wake_up_locked(&pin->wait); + spin_unlock_irq(&pin->wait.lock); } void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) @@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) pin_insert_group(pin, m, &m->mnt_sb->s_pins); } +void pin_kill(struct fs_pin *p) +{ + wait_queue_t wait; + + if (!p) { + rcu_read_unlock(); + return; + } + init_wait(&wait); + spin_lock_irq(&p->wait.lock); + if (likely(!p->done)) { + p->done = -1; + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + p->kill(p); + return; + } + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return; + } + __add_wait_queue(&p->wait, &wait); + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + if (likely(list_empty(&wait.task_list))) + break; + /* OK, we know p couldn't have been freed yet */ + spin_lock_irq(&p->wait.lock); + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + break; + } + } + rcu_read_unlock(); +} + void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; - struct fs_pin *pin; rcu_read_lock(); p = ACCESS_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } - pin = hlist_entry(p, struct fs_pin, m_list); - pin->kill(pin); + pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } @@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; - struct fs_pin *pin; rcu_read_lock(); q = ACCESS_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } - pin = hlist_entry(q, struct fs_pin, s_list); - pin->kill(pin); + pin_kill(hlist_entry(q, struct fs_pin, s_list)); } } diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 2be38d1464ae..9dc4e0384bfb 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,11 +1,22 @@ -#include +#include struct fs_pin { + wait_queue_head_t wait; + int done; struct hlist_node s_list; struct hlist_node m_list; void (*kill)(struct fs_pin *); }; +struct vfsmount; + +static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) +{ + init_waitqueue_head(&p->wait); + p->kill = kill; +} + void pin_remove(struct fs_pin *); void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *); void pin_insert(struct fs_pin *, struct vfsmount *); +void pin_kill(struct fs_pin *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9cf6c51b181..918b117a7cd3 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -19,7 +19,7 @@ struct pidmap { #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) -struct bsd_acct_struct; +struct fs_pin; struct pid_namespace { struct kref kref; @@ -37,7 +37,7 @@ struct pid_namespace { struct dentry *proc_thread_self; #endif #ifdef CONFIG_BSD_PROCESS_ACCT - struct bsd_acct_struct *bacct; + struct fs_pin *bacct; #endif struct user_namespace *user_ns; struct work_struct proc_work; diff --git a/kernel/acct.c b/kernel/acct.c index cf6588ab517b..e6c10d1a4058 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct bsd_acct_struct *acct); struct bsd_acct_struct { struct fs_pin pin; @@ -91,6 +90,8 @@ struct bsd_acct_struct { struct completion done; }; +static void do_acct_process(struct bsd_acct_struct *acct); + /* * Check the amount of free space and suspend/resume accordingly. */ @@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p) kfree_rcu(p, rcu); } +static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) +{ + return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; +} + static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { struct bsd_acct_struct *res; again: smp_rmb(); rcu_read_lock(); - res = ACCESS_ONCE(ns->bacct); + res = to_acct(ACCESS_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; @@ -150,7 +156,7 @@ again: } rcu_read_unlock(); mutex_lock(&res->lock); - if (!res->ns) { + if (res != to_acct(ACCESS_ONCE(ns->bacct))) { mutex_unlock(&res->lock); acct_put(res); goto again; @@ -158,6 +164,19 @@ again: return res; } +static void acct_pin_kill(struct fs_pin *pin) +{ + struct bsd_acct_struct *acct = to_acct(pin); + mutex_lock(&acct->lock); + do_acct_process(acct); + schedule_work(&acct->work); + wait_for_completion(&acct->done); + cmpxchg(&acct->ns->bacct, pin, NULL); + mutex_unlock(&acct->lock); + pin_remove(pin); + acct_put(acct); +} + static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); @@ -168,49 +187,13 @@ static void close_work(struct work_struct *work) complete(&acct->done); } -static void acct_kill(struct bsd_acct_struct *acct) -{ - if (acct) { - struct pid_namespace *ns = acct->ns; - do_acct_process(acct); - INIT_WORK(&acct->work, close_work); - init_completion(&acct->done); - schedule_work(&acct->work); - wait_for_completion(&acct->done); - pin_remove(&acct->pin); - cmpxchg(&ns->bacct, acct, NULL); - acct->ns = NULL; - atomic_long_dec(&acct->count); - mutex_unlock(&acct->lock); - acct_put(acct); - } -} - -static void acct_pin_kill(struct fs_pin *pin) -{ - struct bsd_acct_struct *acct; - acct = container_of(pin, struct bsd_acct_struct, pin); - if (!atomic_long_inc_not_zero(&acct->count)) { - rcu_read_unlock(); - cpu_relax(); - return; - } - rcu_read_unlock(); - mutex_lock(&acct->lock); - if (!acct->ns) { - mutex_unlock(&acct->lock); - acct_put(acct); - acct = NULL; - } - acct_kill(acct); -} - static int acct_on(struct filename *pathname) { struct file *file; struct vfsmount *mnt, *internal; struct pid_namespace *ns = task_active_pid_ns(current); - struct bsd_acct_struct *acct, *old; + struct bsd_acct_struct *acct; + struct fs_pin *old; int err; acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); @@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname) file->f_path.mnt = internal; atomic_long_set(&acct->count, 1); - acct->pin.kill = acct_pin_kill; + init_fs_pin(&acct->pin, acct_pin_kill); acct->file = file; acct->needcheck = jiffies; acct->ns = ns; mutex_init(&acct->lock); + INIT_WORK(&acct->work, close_work); + init_completion(&acct->done); mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ pin_insert(&acct->pin, mnt); - old = acct_get(ns); - ns->bacct = acct; - acct_kill(old); + rcu_read_lock(); + old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); + pin_kill(old); mnt_drop_write(mnt); mntput(mnt); return 0; @@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) mutex_unlock(&acct_on_mutex); putname(tmp); } else { - acct_kill(acct_get(task_active_pid_ns(current))); + rcu_read_lock(); + pin_kill(task_active_pid_ns(current)->bacct); } return error; @@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name) void acct_exit_ns(struct pid_namespace *ns) { - acct_kill(acct_get(ns)); + rcu_read_lock(); + pin_kill(ns->bacct); } /* -- cgit v1.2.3-59-g8ed1b From 87b95ce0964c016ede92763be9c164e49f1019e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Jan 2015 19:01:08 -0500 Subject: switch the IO-triggering parts of umount to fs_pin Signed-off-by: Al Viro --- fs/fs_pin.c | 1 - fs/mount.h | 4 +++- fs/namespace.c | 44 +++++++++++++++++--------------------------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 0c77bdc238b2..b06c98796afb 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -1,7 +1,6 @@ #include #include #include -#include #include "internal.h" #include "mount.h" diff --git a/fs/mount.h b/fs/mount.h index 0ad6f760ce52..6a61c2b3e385 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -2,6 +2,7 @@ #include #include #include +#include struct mnt_namespace { atomic_t count; @@ -62,7 +63,8 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; - struct path mnt_ex_mountpoint; + struct fs_pin mnt_umount; + struct dentry *mnt_ex_mountpoint; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namespace.c b/fs/namespace.c index cd1e9681a0cf..4a985ff0ddfc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -190,6 +190,14 @@ unsigned int mnt_get_count(struct mount *mnt) #endif } +static void drop_mountpoint(struct fs_pin *p) +{ + struct mount *m = container_of(p, struct mount, mnt_umount); + dput(m->mnt_ex_mountpoint); + pin_remove(p); + mntput(&m->mnt); +} + static struct mount *alloc_vfsmnt(const char *name) { struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -229,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif + init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; @@ -1289,7 +1298,6 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { - struct mount *mnt; struct hlist_head head = unmounted; if (likely(hlist_empty(&head))) { @@ -1299,23 +1307,11 @@ static void namespace_unlock(void) head.first->pprev = &head.first; INIT_HLIST_HEAD(&unmounted); - - /* undo decrements we'd done in umount_tree() */ - hlist_for_each_entry(mnt, &head, mnt_hash) - if (mnt->mnt_ex_mountpoint.mnt) - mntget(mnt->mnt_ex_mountpoint.mnt); - up_write(&namespace_sem); synchronize_rcu(); - while (!hlist_empty(&head)) { - mnt = hlist_entry(head.first, struct mount, mnt_hash); - hlist_del_init(&mnt->mnt_hash); - if (mnt->mnt_ex_mountpoint.mnt) - path_put(&mnt->mnt_ex_mountpoint); - mntput(&mnt->mnt); - } + group_pin_kill(&head); } static inline void namespace_lock(void) @@ -1334,7 +1330,6 @@ void umount_tree(struct mount *mnt, int how) { HLIST_HEAD(tmp_list); struct mount *p; - struct mount *last = NULL; for (p = mnt; p; p = next_mnt(p, mnt)) { hlist_del_init_rcu(&p->mnt_hash); @@ -1347,33 +1342,28 @@ void umount_tree(struct mount *mnt, int how) if (how) propagate_umount(&tmp_list); - hlist_for_each_entry(p, &tmp_list, mnt_hash) { + while (!hlist_empty(&tmp_list)) { + p = hlist_entry(tmp_list.first, struct mount, mnt_hash); + hlist_del_init_rcu(&p->mnt_hash); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; + + pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); if (mnt_has_parent(p)) { hlist_del_init(&p->mnt_mp_list); put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); - /* move the reference to mountpoint into ->mnt_ex_mountpoint */ - p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; - p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; + /* old mountpoint will be dropped when we can do that */ + p->mnt_ex_mountpoint = p->mnt_mountpoint; p->mnt_mountpoint = p->mnt.mnt_root; p->mnt_parent = p; p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); - last = p; - } - if (last) { - last->mnt_hash.next = unmounted.first; - if (unmounted.first) - unmounted.first->pprev = &last->mnt_hash.next; - unmounted.first = tmp_list.first; - unmounted.first->pprev = &unmounted.first; } } -- cgit v1.2.3-59-g8ed1b