diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 147 |
1 files changed, 43 insertions, 104 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 8ddd14806799..99186556f8d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -61,9 +61,6 @@ __setup("mphash_entries=", set_mphash_entries); static u64 event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); -static DEFINE_SPINLOCK(mnt_id_lock); -static int mnt_id_start = 0; -static int mnt_group_start = 1; static struct hlist_head *mount_hashtable __read_mostly; static struct hlist_head *mountpoint_hashtable __read_mostly; @@ -101,50 +98,30 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry) static int mnt_alloc_id(struct mount *mnt) { - int res; + int res = ida_alloc(&mnt_id_ida, GFP_KERNEL); -retry: - ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&mnt_id_lock); - res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); - if (!res) - mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&mnt_id_lock); - if (res == -EAGAIN) - goto retry; - - return res; + if (res < 0) + return res; + mnt->mnt_id = res; + return 0; } static void mnt_free_id(struct mount *mnt) { - int id = mnt->mnt_id; - spin_lock(&mnt_id_lock); - ida_remove(&mnt_id_ida, id); - if (mnt_id_start > id) - mnt_id_start = id; - spin_unlock(&mnt_id_lock); + ida_free(&mnt_id_ida, mnt->mnt_id); } /* * Allocate a new peer group ID - * - * mnt_group_ida is protected by namespace_sem */ static int mnt_alloc_group_id(struct mount *mnt) { - int res; - - if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) - return -ENOMEM; - - res = ida_get_new_above(&mnt_group_ida, - mnt_group_start, - &mnt->mnt_group_id); - if (!res) - mnt_group_start = mnt->mnt_group_id + 1; + int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL); - return res; + if (res < 0) + return res; + mnt->mnt_group_id = res; + return 0; } /* @@ -152,10 +129,7 @@ static int mnt_alloc_group_id(struct mount *mnt) */ void mnt_release_group_id(struct mount *mnt) { - int id = mnt->mnt_group_id; - ida_remove(&mnt_group_ida, id); - if (mnt_group_start > id) - mnt_group_start = id; + ida_free(&mnt_group_ida, mnt->mnt_group_id); mnt->mnt_group_id = 0; } @@ -431,74 +405,20 @@ int __mnt_want_write_file(struct file *file) } /** - * mnt_want_write_file_path - get write access to a file's mount - * @file: the file who's mount on which to take a write - * - * This is like mnt_want_write, but it takes a file and can - * do some optimisations if the file is open for write already - * - * Called by the vfs for cases when we have an open file at hand, but will do an - * inode operation on it (important distinction for files opened on overlayfs, - * since the file operations will come from the real underlying file, while - * inode operations come from the overlay). - */ -int mnt_want_write_file_path(struct file *file) -{ - int ret; - - sb_start_write(file->f_path.mnt->mnt_sb); - ret = __mnt_want_write_file(file); - if (ret) - sb_end_write(file->f_path.mnt->mnt_sb); - return ret; -} - -static inline int may_write_real(struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct dentry *upperdentry; - - /* Writable file? */ - if (file->f_mode & FMODE_WRITER) - return 0; - - /* Not overlayfs? */ - if (likely(!(dentry->d_flags & DCACHE_OP_REAL))) - return 0; - - /* File refers to upper, writable layer? */ - upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); - if (upperdentry && - (file_inode(file) == d_inode(upperdentry) || - file_inode(file) == d_inode(dentry))) - return 0; - - /* Lower layer: can't write to real file, sorry... */ - return -EPERM; -} - -/** * mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * * This is like mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already - * - * Mostly called by filesystems from their ioctl operation before performing - * modification. On overlayfs this needs to check if the file is on a read-only - * lower layer and deny access in that case. */ int mnt_want_write_file(struct file *file) { int ret; - ret = may_write_real(file); - if (!ret) { - sb_start_write(file_inode(file)->i_sb); - ret = __mnt_want_write_file(file); - if (ret) - sb_end_write(file_inode(file)->i_sb); - } + sb_start_write(file_inode(file)->i_sb); + ret = __mnt_want_write_file(file); + if (ret) + sb_end_write(file_inode(file)->i_sb); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write_file); @@ -538,14 +458,9 @@ void __mnt_drop_write_file(struct file *file) __mnt_drop_write(file->f_path.mnt); } -void mnt_drop_write_file_path(struct file *file) -{ - mnt_drop_write(file->f_path.mnt); -} - void mnt_drop_write_file(struct file *file) { - __mnt_drop_write(file->f_path.mnt); + __mnt_drop_write_file(file); sb_end_write(file_inode(file)->i_sb); } EXPORT_SYMBOL(mnt_drop_write_file); @@ -659,12 +574,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1195,12 +1119,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); |