aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c147
1 files changed, 43 insertions, 104 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 8ddd14806799..99186556f8d3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -61,9 +61,6 @@ __setup("mphash_entries=", set_mphash_entries);
static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
-static DEFINE_SPINLOCK(mnt_id_lock);
-static int mnt_id_start = 0;
-static int mnt_group_start = 1;
static struct hlist_head *mount_hashtable __read_mostly;
static struct hlist_head *mountpoint_hashtable __read_mostly;
@@ -101,50 +98,30 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
static int mnt_alloc_id(struct mount *mnt)
{
- int res;
+ int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
-retry:
- ida_pre_get(&mnt_id_ida, GFP_KERNEL);
- spin_lock(&mnt_id_lock);
- res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
- if (!res)
- mnt_id_start = mnt->mnt_id + 1;
- spin_unlock(&mnt_id_lock);
- if (res == -EAGAIN)
- goto retry;
-
- return res;
+ if (res < 0)
+ return res;
+ mnt->mnt_id = res;
+ return 0;
}
static void mnt_free_id(struct mount *mnt)
{
- int id = mnt->mnt_id;
- spin_lock(&mnt_id_lock);
- ida_remove(&mnt_id_ida, id);
- if (mnt_id_start > id)
- mnt_id_start = id;
- spin_unlock(&mnt_id_lock);
+ ida_free(&mnt_id_ida, mnt->mnt_id);
}
/*
* Allocate a new peer group ID
- *
- * mnt_group_ida is protected by namespace_sem
*/
static int mnt_alloc_group_id(struct mount *mnt)
{
- int res;
-
- if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
- return -ENOMEM;
-
- res = ida_get_new_above(&mnt_group_ida,
- mnt_group_start,
- &mnt->mnt_group_id);
- if (!res)
- mnt_group_start = mnt->mnt_group_id + 1;
+ int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
- return res;
+ if (res < 0)
+ return res;
+ mnt->mnt_group_id = res;
+ return 0;
}
/*
@@ -152,10 +129,7 @@ static int mnt_alloc_group_id(struct mount *mnt)
*/
void mnt_release_group_id(struct mount *mnt)
{
- int id = mnt->mnt_group_id;
- ida_remove(&mnt_group_ida, id);
- if (mnt_group_start > id)
- mnt_group_start = id;
+ ida_free(&mnt_group_ida, mnt->mnt_group_id);
mnt->mnt_group_id = 0;
}
@@ -431,74 +405,20 @@ int __mnt_want_write_file(struct file *file)
}
/**
- * mnt_want_write_file_path - get write access to a file's mount
- * @file: the file who's mount on which to take a write
- *
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
- *
- * Called by the vfs for cases when we have an open file at hand, but will do an
- * inode operation on it (important distinction for files opened on overlayfs,
- * since the file operations will come from the real underlying file, while
- * inode operations come from the overlay).
- */
-int mnt_want_write_file_path(struct file *file)
-{
- int ret;
-
- sb_start_write(file->f_path.mnt->mnt_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file->f_path.mnt->mnt_sb);
- return ret;
-}
-
-static inline int may_write_real(struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct dentry *upperdentry;
-
- /* Writable file? */
- if (file->f_mode & FMODE_WRITER)
- return 0;
-
- /* Not overlayfs? */
- if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
- return 0;
-
- /* File refers to upper, writable layer? */
- upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
- if (upperdentry &&
- (file_inode(file) == d_inode(upperdentry) ||
- file_inode(file) == d_inode(dentry)))
- return 0;
-
- /* Lower layer: can't write to real file, sorry... */
- return -EPERM;
-}
-
-/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
- *
- * Mostly called by filesystems from their ioctl operation before performing
- * modification. On overlayfs this needs to check if the file is on a read-only
- * lower layer and deny access in that case.
*/
int mnt_want_write_file(struct file *file)
{
int ret;
- ret = may_write_real(file);
- if (!ret) {
- sb_start_write(file_inode(file)->i_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file_inode(file)->i_sb);
- }
+ sb_start_write(file_inode(file)->i_sb);
+ ret = __mnt_want_write_file(file);
+ if (ret)
+ sb_end_write(file_inode(file)->i_sb);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file);
@@ -538,14 +458,9 @@ void __mnt_drop_write_file(struct file *file)
__mnt_drop_write(file->f_path.mnt);
}
-void mnt_drop_write_file_path(struct file *file)
-{
- mnt_drop_write(file->f_path.mnt);
-}
-
void mnt_drop_write_file(struct file *file)
{
- __mnt_drop_write(file->f_path.mnt);
+ __mnt_drop_write_file(file);
sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file);
@@ -659,12 +574,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
+ smp_mb(); // see mntput_no_expire()
if (likely(!read_seqretry(&mount_lock, seq)))
return 0;
if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
mnt_add_count(mnt, -1);
return 1;
}
+ lock_mount_hash();
+ if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+ mnt_add_count(mnt, -1);
+ unlock_mount_hash();
+ return 1;
+ }
+ unlock_mount_hash();
+ /* caller will mntput() */
return -1;
}
@@ -1195,12 +1119,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
- mnt_add_count(mnt, -1);
- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
rcu_read_unlock();
return;
}
lock_mount_hash();
+ /*
+ * make sure that if __legitimize_mnt() has not seen us grab
+ * mount_lock, we'll see their refcount increment here.
+ */
+ smp_mb();
+ mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
rcu_read_unlock();
unlock_mount_hash();