From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:49 +1100 Subject: fs: icache RCU free inodes RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin --- net/socket.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 088fb3fd45e0..97fff3a4e72f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,20 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void wq_free_rcu(struct rcu_head *head) +static void sock_free_rcu(struct rcu_head *head) { - struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + struct inode *inode = container_of(head, struct inode, i_rcu); + struct socket_alloc *ei = container_of(inode, struct socket_alloc, + vfs_inode); - kfree(wq); + kfree(ei->socket.wq); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(sock_inode_cachep, ei); } static void sock_destroy_inode(struct inode *inode) { - struct socket_alloc *ei; - - ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); - kmem_cache_free(sock_inode_cachep, ei); + call_rcu(&inode->i_rcu, sock_free_rcu); } static void init_once(void *foo) -- cgit v1.2.3-59-g8ed1b From ff0c7d15f9787b7e8c601533c015295cc68329f8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:50 +1100 Subject: fs: avoid inode RCU freeing for pseudo fs Pseudo filesystems that don't put inode on RCU list or reachable by rcu-walk dentries do not need to RCU free their inodes. Signed-off-by: Nick Piggin --- fs/inode.c | 6 ++++++ fs/pipe.c | 6 +++++- include/linux/fs.h | 1 + include/linux/net.h | 1 + net/socket.c | 17 +++++++++-------- 5 files changed, 22 insertions(+), 9 deletions(-) (limited to 'net/socket.c') diff --git a/fs/inode.c b/fs/inode.c index 6751dfe8cc06..da85e56378f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -257,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb) return inode; } +void free_inode_nonrcu(struct inode *inode) +{ + kmem_cache_free(inode_cachep, inode); +} +EXPORT_SYMBOL(free_inode_nonrcu); + void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); diff --git a/fs/pipe.c b/fs/pipe.c index 04629f36e397..ae3592dcc88c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1253,6 +1253,10 @@ out: return ret; } +static const struct super_operations pipefs_ops = { + .destroy_inode = free_inode_nonrcu, +}; + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need @@ -1262,7 +1266,7 @@ out: static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ff4d0a33b25..ea202fff44f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2233,6 +2233,7 @@ extern void iget_failed(struct inode *); extern void end_writeback(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); diff --git a/include/linux/net.h b/include/linux/net.h index 06bde4908473..16faa130088c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,6 +120,7 @@ enum sock_shutdown_cmd { struct socket_wq { wait_queue_head_t wait; struct fasync_struct *fasync_list; + struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** diff --git a/net/socket.c b/net/socket.c index 97fff3a4e72f..817dc92e9ef8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void sock_free_rcu(struct rcu_head *head) + +static void wq_free_rcu(struct rcu_head *head) { - struct inode *inode = container_of(head, struct inode, i_rcu); - struct socket_alloc *ei = container_of(inode, struct socket_alloc, - vfs_inode); + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); - kfree(ei->socket.wq); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(sock_inode_cachep, ei); + kfree(wq); } static void sock_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, sock_free_rcu); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) -- cgit v1.2.3-59-g8ed1b From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- fs/9p/vfs_inode.c | 26 +++++++++++++------------- fs/adfs/dir.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/namei.c | 2 +- fs/affs/super.c | 2 +- fs/afs/dir.c | 2 +- fs/anon_inodes.c | 2 +- fs/autofs4/inode.c | 2 +- fs/autofs4/root.c | 10 +++++----- fs/btrfs/export.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 8 ++++---- fs/dcache.c | 30 ++++++++++++++++++++++++++---- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fat/namei_msdos.c | 6 +++--- fs/fat/namei_vfat.c | 8 ++++---- fs/fuse/dir.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/export.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dentry.c | 2 +- fs/isofs/inode.c | 2 +- fs/isofs/namei.c | 2 +- fs/jfs/namei.c | 4 ++-- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 31 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 ++-- fs/ncpfs/inode.c | 2 +- fs/nfs/dir.c | 6 +++--- fs/nfs/getroot.c | 4 ++-- fs/ocfs2/export.c | 4 ++-- fs/ocfs2/namei.c | 10 +++++----- fs/pipe.c | 2 +- fs/proc/base.c | 12 ++++++------ fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/sysv/super.c | 2 +- include/linux/dcache.h | 6 ++++++ kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 63 files changed, 174 insertions(+), 137 deletions(-) (limited to 'net/socket.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d39d8a53b579..5a24f40bb48e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - path.dentry->d_op = &pfmfs_dentry_operations; + d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd84..b09adb57971f 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4a..78f09412740c 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f6f9081e6d2c..df8bbb358d54 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a11e5e102716..bf7693c384f9 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 47dffc513a26..a4041b52fbca 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 5aca08c21100..944a4042fb65 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 4c18fcfb0a19..d39081bbe7ce 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -482,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c18cde27000..b8bb7e7148d0 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564c..aca8806fa206 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa706..a7bdb9dcac84 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 10ca68a96dc7..bfe3f2eb684d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 659f532d26a0..0ccf9a8afcdf 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9d2994a42a2..63e4546b478a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 58abc3da6111..cc01cf826769 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac00..e3b10ca6d453 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b7..2a239d878e85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b0..ec5b68e3b928 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9e37e8bc9b89..aa40c811f8d2 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e9acea440ffc..36637a8c1ed3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir, */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); return NULL; } @@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { - child->d_op = &configfs_dentry_ops; + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c7..f9693da3efbd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e5c7ec1fc98..f91b35db4c6e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e6..351038675376 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8cccfebee180..206351af7c58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b3e072d8982..35ffe43afa4b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4fc06278db48..3be5ed7d859f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..c9a8a426a395 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 44e0a6c57e87..a8b31da19b93 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb9..97012ecff560 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..2aeabd4218cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..f28f89796f4d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41b..ea4aefe7c652 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ef4ee5716abe..0bef62aa4f42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f59..ccab87145f7a 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 182e83a9079e..ddf712e4700e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 861113fcfc88..0bc81cf256b8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 35526df1fd32..32c13a94e1e9 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d8f3a652243d..844a7903c72f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -949,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 715f7d318046..679a849c3b27 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 57f90dad8919..a151cbdec626 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b715b0f7bdfd..3150d766e0d4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index 28b36663c44e..889311e3d06b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3accef..1b9e07728a9f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index c731b50a6184..90bd2873e117 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; status = dentry->d_op->d_revalidate(dentry, nd); @@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (parent->d_op && parent->d_op->d_hash) { + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; @@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_op && dentry->d_op->d_revalidate) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { /* We commonly drop rcu-walk here */ if (nameidata_dentry_drop_rcu(nd, dentry)) return -ECHILD; @@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (!dentry) goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; done: path->mnt = mnt; @@ -1281,8 +1291,7 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (need_reval_dot(nd->path.dentry)) { if (nameidata_drop_rcu_maybe(nd)) return -ECHILD; err = -ESTALE; @@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { + if (base->d_flags & DCACHE_OP_HASH) { err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) @@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (need_reval_dot(dir)) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; goto exit; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0ba3cdc95a44..4b9cbb28d7fa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 60047dbeb38d..0c75a5f3cafd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb77471b8823..37e0a8bb077e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b3e36c3430de..c3a5a1126833 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af3..6adafa576065 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ff5744e1e36f..d14cad6e2e41 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/pipe.c b/fs/pipe.c index ae3592dcc88c..01a786567810 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; diff --git a/fs/proc/base.c b/fs/proc/base.c index d932fdb6a245..85f0a80912aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1d607be36d95..f766be29d2c7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 998e3a715bcc..35efd85a4d32 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7a..e0f0d7ea10a1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 27e1102e303e..3e076caa8daf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 7507aeb4c90e..b5e68da2db32 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c10..76712aefc4ab 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4414693065e..f4b40a751f09 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,6 +183,11 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470c3949..51cddc11cd85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/net/socket.c b/net/socket.c index 817dc92e9ef8..991e266bc7ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2899fe27f880..09f01f41e55a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } -- cgit v1.2.3-59-g8ed1b From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:07 +1100 Subject: fs: improve scalability of pseudo filesystems Regardless of how much we possibly try to scale dcache, there is likely always going to be some fundamental contention when adding or removing children under the same parent. Pseudo filesystems do not seem need to have connected dentries because by definition they are disconnected. Signed-off-by: Nick Piggin --- fs/anon_inodes.c | 2 +- fs/dcache.c | 12 ++++++++++++ fs/pipe.c | 2 +- include/linux/dcache.h | 1 + net/socket.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'net/socket.c') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index aca8806fa206..9d92b33da8a0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name, this.name = name; this.len = strlen(name); this.hash = 0; - path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); if (!path.dentry) goto err_module; diff --git a/fs/dcache.c b/fs/dcache.c index 09ec945f3c98..9e6e6db76869 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; diff --git a/fs/pipe.c b/fs/pipe.c index 01a786567810..cfe3a7f2ee21 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags) goto err; err = -ENOMEM; - path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d719e4de8046..c0a2ca97c72f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); diff --git a/net/socket.c b/net/socket.c index 991e266bc7ae..0ee74c325320 100644 --- a/net/socket.c +++ b/net/socket.c @@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:11 +1100 Subject: fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 26 +++-- fs/internal.h | 1 + fs/namei.c | 24 +++++ fs/namespace.c | 242 +++++++++++++++++++++++++++++++++++++-------- fs/pipe.c | 2 +- fs/pnode.c | 4 +- fs/super.c | 2 +- include/linux/mount.h | 53 ++++------ include/linux/path.h | 2 + net/socket.c | 19 +++- 13 files changed, 283 insertions(+), 98 deletions(-) (limited to 'net/socket.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a24f40bb48e..f099b82703d8 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) * any operations on the root directory. However, we need a non-trivial * d_name - pfm: will go nicely and kill the special-casing in procfs. */ -static struct vfsmount *pfmfs_mnt; +static struct vfsmount *pfmfs_mnt __read_mostly; static int __init init_pfm_fs(void) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 4759d827e8c7..f511dd15fd31 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput(mtd_inode_mnt); + mntput_long(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9d92b33da8a0..5fd38112a6ca 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -232,7 +232,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + mntput_long(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 60b8531f41c5..68ca487bedb1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put(&old_root); + path_put_long(&old_root); } /* @@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put(&old_pwd); + path_put_long(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put(old_root); + path_put_long(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put(&fs->root); - path_put(&fs->pwd); + path_put_long(&fs->root); + path_put_long(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock_init(&fs->lock); seqcount_init(&fs->seq); fs->umask = old->umask; - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); + + spin_lock(&old->lock); + fs->root = old->root; + path_get_long(&fs->root); + fs->pwd = old->pwd; + path_get_long(&fs->pwd); + spin_unlock(&old->lock); } return fs; } diff --git a/fs/internal.h b/fs/internal.h index e43b9a4dbf4e..9687c2ee2735 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); +extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); diff --git a/fs/namei.c b/fs/namei.c index 4e957bf744ae..19433cdba011 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,6 +367,18 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); +/** + * path_get_long - get a long reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get_long(struct path *path) +{ + mntget_long(path->mnt); + dget(path->dentry); +} + /** * path_put - put a reference to a path * @path: path to put the reference to @@ -380,6 +392,18 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); +/** + * path_put_long - put a long reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. + */ +void path_put_long(struct path *path) +{ + dput(path->dentry); + mntput_long(path->mnt); +} + /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index 03b82350f020..3ddfd9046c44 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = atomic_read(&mnt->mnt_longrefs); + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + atomic_set(&mnt->mnt_longrefs, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; #endif } return mnt; @@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly); static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif @@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt) static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif @@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt) int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so mnt_get_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); @@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt) deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) -{ -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) - return; +#ifdef CONFIG_SMP +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ + if (!longrefs) { +put_again: + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longrefs))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); + return; + } + br_read_unlock(vfsmount_lock); + } else { + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) + return; + } + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + if (!longrefs) + mnt_dec_count(mnt); + else + atomic_dec(&mnt->mnt_longrefs); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - __mntput(mnt); + acct_auto_close_mnt(mnt); + goto put_again; + } + br_write_unlock(vfsmount_lock); + mntfree(mnt); +} +#else +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ +put_again: + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); +} +#endif + +static void mntput_no_expire(struct vfsmount *mnt) +{ + __mntput(mnt, 0); +} + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 0); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; +} +EXPORT_SYMBOL(mntget); + +void mntput_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 1); + } +#else + mntput(mnt); +#endif } -EXPORT_SYMBOL(mntput_no_expire); +EXPORT_SYMBOL(mntput_long); + +struct vfsmount *mntget_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) + atomic_inc(&mnt->mnt_longrefs); + return mnt; +#else + return mntget(mnt); +#endif +} +EXPORT_SYMBOL(mntget_long); void mnt_pin(struct vfsmount *mnt) { @@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput(mnt); + mntput_long(mnt); } } @@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_lock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, unlock: up_write(&namespace_sem); - mntput(newmnt); + mntput_long(newmnt); return err; } @@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (fs) { if (p == fs->root.mnt) { rootmnt = p; - fs->root.mnt = mntget(q); + fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwd.mnt = mntget(q); + fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput(rootmnt); + mntput_long(rootmnt); if (pwdmnt) - mntput(pwdmnt); + mntput_long(pwdmnt); return new_ns; } @@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); + error = 0; path_put(&root_parent); path_put(&parent_path); @@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); diff --git a/fs/pipe.c b/fs/pipe.c index cfe3a7f2ee21..68f1f8e4e23b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); + mntput_long(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/pnode.c b/fs/pnode.c index 8066b8dd748f..d42514e32380 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -288,7 +288,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; return (mycount > count); } @@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * - * vfsmount lock must be held for read or write + * vfsmount lock must be held for write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { diff --git a/fs/super.c b/fs/super.c index 968ba013011a..823e061faa87 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput(mnt); + mntput_long(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd4..1869ea24a739 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include #include #include +#include #include struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_inc(&mnt->mnt_count); - return mnt; -} - struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); -extern void mntput_no_expire(struct vfsmount *mnt); +extern void mntput(struct vfsmount *mnt); +extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mntput_long(struct vfsmount *mnt); +extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -static inline void mntput(struct vfsmount *mnt) -{ - if (mnt) { - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } -} - extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec6266..a581e8c06533 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,7 +10,9 @@ struct path { }; extern void path_get(struct path *); +extern void path_get_long(struct path *); extern void path_put(struct path *); +extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { diff --git a/net/socket.c b/net/socket.c index 0ee74c325320..815bba3d2fe0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2406,8 +2408,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2420,7 +2429,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ -- cgit v1.2.3-59-g8ed1b