aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c477
1 files changed, 421 insertions, 56 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index c9cab307fa77..3357c3d65475 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
#include <linux/init.h> /* init_rootfs */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
+#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
@@ -1832,6 +1833,27 @@ struct vfsmount *collect_mounts(const struct path *path)
return &tree->mnt;
}
+static void free_mnt_ns(struct mnt_namespace *);
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
+
+void dissolve_on_fput(struct vfsmount *mnt)
+{
+ struct mnt_namespace *ns;
+ namespace_lock();
+ lock_mount_hash();
+ ns = real_mount(mnt)->mnt_ns;
+ if (ns) {
+ if (is_anon_ns(ns))
+ umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+ else
+ ns = NULL;
+ }
+ unlock_mount_hash();
+ namespace_unlock();
+ if (ns)
+ free_mnt_ns(ns);
+}
+
void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
@@ -2065,6 +2087,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
+ if (source_mnt->mnt_ns) {
+ /* move from anon - the caller will destroy */
+ list_del_init(&source_mnt->mnt_ns->list);
+ }
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
@@ -2222,6 +2248,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
+static struct mount *__do_loopback(struct path *old_path, int recurse)
+{
+ struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
+
+ if (IS_MNT_UNBINDABLE(old))
+ return mnt;
+
+ if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
+ return mnt;
+
+ if (!recurse && has_locked_children(old, old_path->dentry))
+ return mnt;
+
+ if (recurse)
+ mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
+ else
+ mnt = clone_mnt(old, old_path->dentry, 0);
+
+ if (!IS_ERR(mnt))
+ mnt->mnt.mnt_flags &= ~MNT_LOCKED;
+
+ return mnt;
+}
+
/*
* do loopback mount.
*/
@@ -2229,7 +2279,7 @@ static int do_loopback(struct path *path, const char *old_name,
int recurse)
{
struct path old_path;
- struct mount *mnt = NULL, *old, *parent;
+ struct mount *mnt = NULL, *parent;
struct mountpoint *mp;
int err;
if (!old_name || !*old_name)
@@ -2243,38 +2293,21 @@ static int do_loopback(struct path *path, const char *old_name,
goto out;
mp = lock_mount(path);
- err = PTR_ERR(mp);
- if (IS_ERR(mp))
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
goto out;
+ }
- old = real_mount(old_path.mnt);
parent = real_mount(path->mnt);
-
- err = -EINVAL;
- if (IS_MNT_UNBINDABLE(old))
- goto out2;
-
if (!check_mnt(parent))
goto out2;
- if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
- goto out2;
-
- if (!recurse && has_locked_children(old, old_path.dentry))
- goto out2;
-
- if (recurse)
- mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
- else
- mnt = clone_mnt(old, old_path.dentry, 0);
-
+ mnt = __do_loopback(&old_path, recurse);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto out2;
}
- mnt->mnt.mnt_flags &= ~MNT_LOCKED;
-
err = graft_tree(mnt, parent, mp);
if (err) {
lock_mount_hash();
@@ -2288,6 +2321,96 @@ out:
return err;
}
+static struct file *open_detached_copy(struct path *path, bool recursive)
+{
+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+ struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
+ struct mount *mnt, *p;
+ struct file *file;
+
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
+
+ namespace_lock();
+ mnt = __do_loopback(path, recursive);
+ if (IS_ERR(mnt)) {
+ namespace_unlock();
+ free_mnt_ns(ns);
+ return ERR_CAST(mnt);
+ }
+
+ lock_mount_hash();
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ p->mnt_ns = ns;
+ ns->mounts++;
+ }
+ ns->root = mnt;
+ list_add_tail(&ns->list, &mnt->mnt_list);
+ mntget(&mnt->mnt);
+ unlock_mount_hash();
+ namespace_unlock();
+
+ mntput(path->mnt);
+ path->mnt = &mnt->mnt;
+ file = dentry_open(path, O_PATH, current_cred());
+ if (IS_ERR(file))
+ dissolve_on_fput(path->mnt);
+ else
+ file->f_mode |= FMODE_NEED_UNMOUNT;
+ return file;
+}
+
+SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
+{
+ struct file *file;
+ struct path path;
+ int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+ bool detached = flags & OPEN_TREE_CLONE;
+ int error;
+ int fd;
+
+ BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
+
+ if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
+ AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
+ OPEN_TREE_CLOEXEC))
+ return -EINVAL;
+
+ if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
+ return -EINVAL;
+
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ if (detached && !may_mount())
+ return -EPERM;
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ error = user_path_at(dfd, filename, lookup_flags, &path);
+ if (unlikely(error)) {
+ file = ERR_PTR(error);
+ } else {
+ if (detached)
+ file = open_detached_copy(&path, flags & AT_RECURSIVE);
+ else
+ file = dentry_open(&path, O_PATH, current_cred());
+ path_put(&path);
+ }
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ return PTR_ERR(file);
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
/*
* Don't allow locked mount flags to be cleared.
*
@@ -2426,72 +2549,117 @@ static inline int tree_contains_unbindable(struct mount *mnt)
return 0;
}
-static int do_move_mount(struct path *path, const char *old_name)
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree. Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
{
- struct path old_path, parent_path;
+ struct mount *p;
+ bool ret = false;
+
+ lock_mount_hash();
+ for (p = subtree; p; p = next_mnt(p, subtree))
+ if (mnt_ns_loop(p->mnt.mnt_root))
+ goto out;
+
+ ret = true;
+out:
+ unlock_mount_hash();
+ return ret;
+}
+
+static int do_move_mount(struct path *old_path, struct path *new_path)
+{
+ struct path parent_path = {.mnt = NULL, .dentry = NULL};
+ struct mnt_namespace *ns;
struct mount *p;
struct mount *old;
struct mountpoint *mp;
int err;
- if (!old_name || !*old_name)
- return -EINVAL;
- err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
- if (err)
- return err;
+ bool attached;
- mp = lock_mount(path);
- err = PTR_ERR(mp);
+ mp = lock_mount(new_path);
if (IS_ERR(mp))
- goto out;
+ return PTR_ERR(mp);
- old = real_mount(old_path.mnt);
- p = real_mount(path->mnt);
+ old = real_mount(old_path->mnt);
+ p = real_mount(new_path->mnt);
+ attached = mnt_has_parent(old);
+ ns = old->mnt_ns;
err = -EINVAL;
- if (!check_mnt(p) || !check_mnt(old))
- goto out1;
+ /* The mountpoint must be in our namespace. */
+ if (!check_mnt(p))
+ goto out;
- if (old->mnt.mnt_flags & MNT_LOCKED)
- goto out1;
+ /* The thing moved should be either ours or completely unattached. */
+ if (attached && !check_mnt(old))
+ goto out;
- err = -EINVAL;
- if (old_path.dentry != old_path.mnt->mnt_root)
- goto out1;
+ if (!attached && !is_anon_ns(ns))
+ goto out;
- if (!mnt_has_parent(old))
- goto out1;
+ if (old->mnt.mnt_flags & MNT_LOCKED)
+ goto out;
- if (d_is_dir(path->dentry) !=
- d_is_dir(old_path.dentry))
- goto out1;
+ if (old_path->dentry != old_path->mnt->mnt_root)
+ goto out;
+
+ if (d_is_dir(new_path->dentry) !=
+ d_is_dir(old_path->dentry))
+ goto out;
/*
* Don't move a mount residing in a shared parent.
*/
- if (IS_MNT_SHARED(old->mnt_parent))
- goto out1;
+ if (attached && IS_MNT_SHARED(old->mnt_parent))
+ goto out;
/*
* Don't move a mount tree containing unbindable mounts to a destination
* mount which is shared.
*/
if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
- goto out1;
+ goto out;
err = -ELOOP;
+ if (!check_for_nsfs_mounts(old))
+ goto out;
for (; mnt_has_parent(p); p = p->mnt_parent)
if (p == old)
- goto out1;
+ goto out;
- err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
+ err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
+ attached ? &parent_path : NULL);
if (err)
- goto out1;
+ goto out;
/* if the mount is moved, it should no longer be expire
* automatically */
list_del_init(&old->mnt_expire);
-out1:
- unlock_mount(mp);
out:
- if (!err)
+ unlock_mount(mp);
+ if (!err) {
path_put(&parent_path);
+ if (!attached)
+ free_mnt_ns(ns);
+ }
+ return err;
+}
+
+static int do_move_mount_old(struct path *path, const char *old_name)
+{
+ struct path old_path;
+ int err;
+
+ if (!old_name || !*old_name)
+ return -EINVAL;
+
+ err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
+ if (err)
+ return err;
+
+ err = do_move_mount(&old_path, path);
path_put(&old_path);
return err;
}
@@ -2937,7 +3105,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
- retval = do_move_mount(&path, dev_name);
+ retval = do_move_mount_old(&path, dev_name);
else
retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
dev_name, data_page);
@@ -3166,6 +3334,203 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
}
/*
+ * Create a kernel mount representation for a new, prepared superblock
+ * (specified by fs_fd) and attach to an open_tree-like file descriptor.
+ */
+SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
+ unsigned int, attr_flags)
+{
+ struct mnt_namespace *ns;
+ struct fs_context *fc;
+ struct file *file;
+ struct path newmount;
+ struct mount *mnt;
+ struct fd f;
+ unsigned int mnt_flags = 0;
+ long ret;
+
+ if (!may_mount())
+ return -EPERM;
+
+ if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
+ return -EINVAL;
+
+ if (attr_flags & ~(MOUNT_ATTR_RDONLY |
+ MOUNT_ATTR_NOSUID |
+ MOUNT_ATTR_NODEV |
+ MOUNT_ATTR_NOEXEC |
+ MOUNT_ATTR__ATIME |
+ MOUNT_ATTR_NODIRATIME))
+ return -EINVAL;
+
+ if (attr_flags & MOUNT_ATTR_RDONLY)
+ mnt_flags |= MNT_READONLY;
+ if (attr_flags & MOUNT_ATTR_NOSUID)
+ mnt_flags |= MNT_NOSUID;
+ if (attr_flags & MOUNT_ATTR_NODEV)
+ mnt_flags |= MNT_NODEV;
+ if (attr_flags & MOUNT_ATTR_NOEXEC)
+ mnt_flags |= MNT_NOEXEC;
+ if (attr_flags & MOUNT_ATTR_NODIRATIME)
+ mnt_flags |= MNT_NODIRATIME;
+
+ switch (attr_flags & MOUNT_ATTR__ATIME) {
+ case MOUNT_ATTR_STRICTATIME:
+ break;
+ case MOUNT_ATTR_NOATIME:
+ mnt_flags |= MNT_NOATIME;
+ break;
+ case MOUNT_ATTR_RELATIME:
+ mnt_flags |= MNT_RELATIME;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ f = fdget(fs_fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = -EINVAL;
+ if (f.file->f_op != &fscontext_fops)
+ goto err_fsfd;
+
+ fc = f.file->private_data;
+
+ ret = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (ret < 0)
+ goto err_fsfd;
+
+ /* There must be a valid superblock or we can't mount it */
+ ret = -EINVAL;
+ if (!fc->root)
+ goto err_unlock;
+
+ ret = -EPERM;
+ if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
+ pr_warn("VFS: Mount too revealing\n");
+ goto err_unlock;
+ }
+
+ ret = -EBUSY;
+ if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
+ goto err_unlock;
+
+ ret = -EPERM;
+ if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
+ goto err_unlock;
+
+ newmount.mnt = vfs_create_mount(fc);
+ if (IS_ERR(newmount.mnt)) {
+ ret = PTR_ERR(newmount.mnt);
+ goto err_unlock;
+ }
+ newmount.dentry = dget(fc->root);
+ newmount.mnt->mnt_flags = mnt_flags;
+
+ /* We've done the mount bit - now move the file context into more or
+ * less the same state as if we'd done an fspick(). We don't want to
+ * do any memory allocation or anything like that at this point as we
+ * don't want to have to handle any errors incurred.
+ */
+ vfs_clean_context(fc);
+
+ ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
+ if (IS_ERR(ns)) {
+ ret = PTR_ERR(ns);
+ goto err_path;
+ }
+ mnt = real_mount(newmount.mnt);
+ mnt->mnt_ns = ns;
+ ns->root = mnt;
+ ns->mounts = 1;
+ list_add(&mnt->mnt_list, &ns->list);
+
+ /* Attach to an apparent O_PATH fd with a note that we need to unmount
+ * it, not just simply put it.
+ */
+ file = dentry_open(&newmount, O_PATH, fc->cred);
+ if (IS_ERR(file)) {
+ dissolve_on_fput(newmount.mnt);
+ ret = PTR_ERR(file);
+ goto err_path;
+ }
+ file->f_mode |= FMODE_NEED_UNMOUNT;
+
+ ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
+ if (ret >= 0)
+ fd_install(ret, file);
+ else
+ fput(file);
+
+err_path:
+ path_put(&newmount);
+err_unlock:
+ mutex_unlock(&fc->uapi_mutex);
+err_fsfd:
+ fdput(f);
+ return ret;
+}
+
+/*
+ * Move a mount from one place to another. In combination with
+ * fsopen()/fsmount() this is used to install a new mount and in combination
+ * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
+ * a mount subtree.
+ *
+ * Note the flags value is a combination of MOVE_MOUNT_* flags.
+ */
+SYSCALL_DEFINE5(move_mount,
+ int, from_dfd, const char *, from_pathname,
+ int, to_dfd, const char *, to_pathname,
+ unsigned int, flags)
+{
+ struct path from_path, to_path;
+ unsigned int lflags;
+ int ret = 0;
+
+ if (!may_mount())
+ return -EPERM;
+
+ if (flags & ~MOVE_MOUNT__MASK)
+ return -EINVAL;
+
+ /* If someone gives a pathname, they aren't permitted to move
+ * from an fd that requires unmount as we can't get at the flag
+ * to clear it afterwards.
+ */
+ lflags = 0;
+ if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
+ if (ret < 0)
+ return ret;
+
+ lflags = 0;
+ if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
+ if (ret < 0)
+ goto out_from;
+
+ ret = security_move_mount(&from_path, &to_path);
+ if (ret < 0)
+ goto out_to;
+
+ ret = do_move_mount(&from_path, &to_path);
+
+out_to:
+ path_put(&to_path);
+out_from:
+ path_put(&from_path);
+ return ret;
+}
+
+/*
* Return true if path is reachable from root
*
* namespace_sem or mount_lock is held