aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/fs_context.c160
-rw-r--r--fs/fsopen.c477
-rw-r--r--fs/internal.h4
-rw-r--r--fs/namespace.c477
6 files changed, 1055 insertions, 74 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 35945f8139e6..5a51bc2489ba 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
- fs_types.o fs_context.o fs_parser.o
+ fs_types.o fs_context.o fs_parser.o fsopen.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/file_table.c b/fs/file_table.c
index 155d7514a094..3f9c1b452c1d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -255,6 +255,7 @@ static void __fput(struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
+ fmode_t mode = file->f_mode;
if (unlikely(!(file->f_mode & FMODE_OPENED)))
goto out;
@@ -277,18 +278,20 @@ static void __fput(struct file *file)
if (file->f_op->release)
file->f_op->release(inode, file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
- !(file->f_mode & FMODE_PATH))) {
+ !(mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
- if (file->f_mode & FMODE_WRITER) {
+ if (mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(mnt);
}
dput(dentry);
+ if (unlikely(mode & FMODE_NEED_UNMOUNT))
+ dissolve_on_fput(mnt);
mntput(mnt);
out:
file_free(file);
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 87e3546b9a52..a47ccd5a4a78 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -11,6 +11,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fs.h>
@@ -23,6 +24,7 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
+#include <asm/sections.h>
#include "mount.h"
#include "internal.h"
@@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
fc->cred = get_current_cred();
fc->net_ns = get_net(current->nsproxy->net_ns);
+ mutex_init(&fc->uapi_mutex);
+
switch (purpose) {
case FS_CONTEXT_FOR_MOUNT:
fc->user_ns = get_user_ns(fc->cred->user_ns);
@@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
if (!fc)
return ERR_PTR(-ENOMEM);
+ mutex_init(&fc->uapi_mutex);
+
fc->fs_private = NULL;
fc->s_fs_info = NULL;
fc->source = NULL;
@@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
get_net(fc->net_ns);
get_user_ns(fc->user_ns);
get_cred(fc->cred);
+ if (fc->log)
+ refcount_inc(&fc->log->usage);
/* Can't call put until we've called ->dup */
ret = fc->ops->dup(fc, src_fc);
@@ -378,7 +386,6 @@ err_fc:
}
EXPORT_SYMBOL(vfs_dup_fs_context);
-#ifdef CONFIG_PRINTK
/**
* logfc - Log a message to a filesystem context
* @fc: The filesystem context to log to.
@@ -386,27 +393,100 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
*/
void logfc(struct fs_context *fc, const char *fmt, ...)
{
+ static const char store_failure[] = "OOM: Can't store error string";
+ struct fc_log *log = fc ? fc->log : NULL;
+ const char *p;
va_list va;
+ char *q;
+ u8 freeable;
va_start(va, fmt);
-
- switch (fmt[0]) {
- case 'w':
- vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
- break;
- case 'e':
- vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
- break;
- default:
- vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
- break;
+ if (!strchr(fmt, '%')) {
+ p = fmt;
+ goto unformatted_string;
+ }
+ if (strcmp(fmt, "%s") == 0) {
+ p = va_arg(va, const char *);
+ goto unformatted_string;
}
- pr_cont("\n");
+ q = kvasprintf(GFP_KERNEL, fmt, va);
+copied_string:
+ if (!q)
+ goto store_failure;
+ freeable = 1;
+ goto store_string;
+
+unformatted_string:
+ if ((unsigned long)p >= (unsigned long)__start_rodata &&
+ (unsigned long)p < (unsigned long)__end_rodata)
+ goto const_string;
+ if (log && within_module_core((unsigned long)p, log->owner))
+ goto const_string;
+ q = kstrdup(p, GFP_KERNEL);
+ goto copied_string;
+
+store_failure:
+ p = store_failure;
+const_string:
+ q = (char *)p;
+ freeable = 0;
+store_string:
+ if (!log) {
+ switch (fmt[0]) {
+ case 'w':
+ printk(KERN_WARNING "%s\n", q + 2);
+ break;
+ case 'e':
+ printk(KERN_ERR "%s\n", q + 2);
+ break;
+ default:
+ printk(KERN_NOTICE "%s\n", q + 2);
+ break;
+ }
+ if (freeable)
+ kfree(q);
+ } else {
+ unsigned int logsize = ARRAY_SIZE(log->buffer);
+ u8 index;
+
+ index = log->head & (logsize - 1);
+ BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
+ sizeof(log->tail) != sizeof(u8));
+ if ((u8)(log->head - log->tail) == logsize) {
+ /* The buffer is full, discard the oldest message */
+ if (log->need_free & (1 << index))
+ kfree(log->buffer[index]);
+ log->tail++;
+ }
+
+ log->buffer[index] = q;
+ log->need_free &= ~(1 << index);
+ log->need_free |= freeable << index;
+ log->head++;
+ }
va_end(va);
}
EXPORT_SYMBOL(logfc);
-#endif
+
+/*
+ * Free a logging structure.
+ */
+static void put_fc_log(struct fs_context *fc)
+{
+ struct fc_log *log = fc->log;
+ int i;
+
+ if (log) {
+ if (refcount_dec_and_test(&log->usage)) {
+ fc->log = NULL;
+ for (i = 0; i <= 7; i++)
+ if (log->need_free & (1 << i))
+ kfree(log->buffer[i]);
+ kfree(log);
+ }
+ }
+}
/**
* put_fs_context - Dispose of a superblock configuration context.
@@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc)
put_user_ns(fc->user_ns);
put_cred(fc->cred);
kfree(fc->subtype);
+ put_fc_log(fc);
put_filesystem(fc->fs_type);
kfree(fc->source);
kfree(fc);
@@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data)
return monolithic_mount_data(fc, data);
}
+
+/*
+ * Clean up a context after performing an action on it and put it into a state
+ * from where it can be used to reconfigure a superblock.
+ *
+ * Note that here we do only the parts that can't fail; the rest is in
+ * finish_clean_context() below and in between those fs_context is marked
+ * FS_CONTEXT_AWAITING_RECONF. The reason for splitup is that after
+ * successful mount or remount we need to report success to userland.
+ * Trying to do full reinit (for the sake of possible subsequent remount)
+ * and failing to allocate memory would've put us into a nasty situation.
+ * So here we only discard the old state and reinitialization is left
+ * until we actually try to reconfigure.
+ */
+void vfs_clean_context(struct fs_context *fc)
+{
+ if (fc->need_free && fc->ops && fc->ops->free)
+ fc->ops->free(fc);
+ fc->need_free = false;
+ fc->fs_private = NULL;
+ fc->s_fs_info = NULL;
+ fc->sb_flags = 0;
+ security_free_mnt_opts(&fc->security);
+ kfree(fc->subtype);
+ fc->subtype = NULL;
+ kfree(fc->source);
+ fc->source = NULL;
+
+ fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
+ fc->phase = FS_CONTEXT_AWAITING_RECONF;
+}
+
+int finish_clean_context(struct fs_context *fc)
+{
+ int error;
+
+ if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
+ return 0;
+
+ if (fc->fs_type->init_fs_context)
+ error = fc->fs_type->init_fs_context(fc);
+ else
+ error = legacy_init_fs_context(fc);
+ if (unlikely(error)) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return error;
+ }
+ fc->need_free = true;
+ fc->phase = FS_CONTEXT_RECONF_PARAMS;
+ return 0;
+}
diff --git a/fs/fsopen.c b/fs/fsopen.c
new file mode 100644
index 000000000000..3bb9c0c8cbcc
--- /dev/null
+++ b/fs/fsopen.c
@@ -0,0 +1,477 @@
+/* Filesystem access-by-fd.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/security.h>
+#include <linux/anon_inodes.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <uapi/linux/mount.h>
+#include "internal.h"
+#include "mount.h"
+
+/*
+ * Allow the user to read back any error, warning or informational messages.
+ */
+static ssize_t fscontext_read(struct file *file,
+ char __user *_buf, size_t len, loff_t *pos)
+{
+ struct fs_context *fc = file->private_data;
+ struct fc_log *log = fc->log;
+ unsigned int logsize = ARRAY_SIZE(log->buffer);
+ ssize_t ret;
+ char *p;
+ bool need_free;
+ int index, n;
+
+ ret = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (ret < 0)
+ return ret;
+
+ if (log->head == log->tail) {
+ mutex_unlock(&fc->uapi_mutex);
+ return -ENODATA;
+ }
+
+ index = log->tail & (logsize - 1);
+ p = log->buffer[index];
+ need_free = log->need_free & (1 << index);
+ log->buffer[index] = NULL;
+ log->need_free &= ~(1 << index);
+ log->tail++;
+ mutex_unlock(&fc->uapi_mutex);
+
+ ret = -EMSGSIZE;
+ n = strlen(p);
+ if (n > len)
+ goto err_free;
+ ret = -EFAULT;
+ if (copy_to_user(_buf, p, n) != 0)
+ goto err_free;
+ ret = n;
+
+err_free:
+ if (need_free)
+ kfree(p);
+ return ret;
+}
+
+static int fscontext_release(struct inode *inode, struct file *file)
+{
+ struct fs_context *fc = file->private_data;
+
+ if (fc) {
+ file->private_data = NULL;
+ put_fs_context(fc);
+ }
+ return 0;
+}
+
+const struct file_operations fscontext_fops = {
+ .read = fscontext_read,
+ .release = fscontext_release,
+ .llseek = no_llseek,
+};
+
+/*
+ * Attach a filesystem context to a file and an fd.
+ */
+static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
+{
+ int fd;
+
+ fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
+ O_RDWR | o_flags);
+ if (fd < 0)
+ put_fs_context(fc);
+ return fd;
+}
+
+static int fscontext_alloc_log(struct fs_context *fc)
+{
+ fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
+ if (!fc->log)
+ return -ENOMEM;
+ refcount_set(&fc->log->usage, 1);
+ fc->log->owner = fc->fs_type->owner;
+ return 0;
+}
+
+/*
+ * Open a filesystem by name so that it can be configured for mounting.
+ *
+ * We are allowed to specify a container in which the filesystem will be
+ * opened, thereby indicating which namespaces will be used (notably, which
+ * network namespace will be used for network filesystems).
+ */
+SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
+{
+ struct file_system_type *fs_type;
+ struct fs_context *fc;
+ const char *fs_name;
+ int ret;
+
+ if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (flags & ~FSOPEN_CLOEXEC)
+ return -EINVAL;
+
+ fs_name = strndup_user(_fs_name, PAGE_SIZE);
+ if (IS_ERR(fs_name))
+ return PTR_ERR(fs_name);
+
+ fs_type = get_fs_type(fs_name);
+ kfree(fs_name);
+ if (!fs_type)
+ return -ENODEV;
+
+ fc = fs_context_for_mount(fs_type, 0);
+ put_filesystem(fs_type);
+ if (IS_ERR(fc))
+ return PTR_ERR(fc);
+
+ fc->phase = FS_CONTEXT_CREATE_PARAMS;
+
+ ret = fscontext_alloc_log(fc);
+ if (ret < 0)
+ goto err_fc;
+
+ return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
+
+err_fc:
+ put_fs_context(fc);
+ return ret;
+}
+
+/*
+ * Pick a superblock into a context for reconfiguration.
+ */
+SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
+{
+ struct fs_context *fc;
+ struct path target;
+ unsigned int lookup_flags;
+ int ret;
+
+ if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((flags & ~(FSPICK_CLOEXEC |
+ FSPICK_SYMLINK_NOFOLLOW |
+ FSPICK_NO_AUTOMOUNT |
+ FSPICK_EMPTY_PATH)) != 0)
+ return -EINVAL;
+
+ lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
+ if (flags & FSPICK_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & FSPICK_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & FSPICK_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ ret = user_path_at(dfd, path, lookup_flags, &target);
+ if (ret < 0)
+ goto err;
+
+ ret = -EINVAL;
+ if (target.mnt->mnt_root != target.dentry)
+ goto err_path;
+
+ fc = fs_context_for_reconfigure(target.dentry, 0, 0);
+ if (IS_ERR(fc)) {
+ ret = PTR_ERR(fc);
+ goto err_path;
+ }
+
+ fc->phase = FS_CONTEXT_RECONF_PARAMS;
+
+ ret = fscontext_alloc_log(fc);
+ if (ret < 0)
+ goto err_fc;
+
+ path_put(&target);
+ return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
+
+err_fc:
+ put_fs_context(fc);
+err_path:
+ path_put(&target);
+err:
+ return ret;
+}
+
+/*
+ * Check the state and apply the configuration. Note that this function is
+ * allowed to 'steal' the value by setting param->xxx to NULL before returning.
+ */
+static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
+ struct fs_parameter *param)
+{
+ struct super_block *sb;
+ int ret;
+
+ ret = finish_clean_context(fc);
+ if (ret)
+ return ret;
+ switch (cmd) {
+ case FSCONFIG_CMD_CREATE:
+ if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
+ return -EBUSY;
+ fc->phase = FS_CONTEXT_CREATING;
+ ret = vfs_get_tree(fc);
+ if (ret)
+ break;
+ sb = fc->root->d_sb;
+ ret = security_sb_kern_mount(sb);
+ if (unlikely(ret)) {
+ fc_drop_locked(fc);
+ break;
+ }
+ up_write(&sb->s_umount);
+ fc->phase = FS_CONTEXT_AWAITING_MOUNT;
+ return 0;
+ case FSCONFIG_CMD_RECONFIGURE:
+ if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
+ return -EBUSY;
+ fc->phase = FS_CONTEXT_RECONFIGURING;
+ sb = fc->root->d_sb;
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ down_write(&sb->s_umount);
+ ret = reconfigure_super(fc);
+ up_write(&sb->s_umount);
+ if (ret)
+ break;
+ vfs_clean_context(fc);
+ return 0;
+ default:
+ if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
+ fc->phase != FS_CONTEXT_RECONF_PARAMS)
+ return -EBUSY;
+
+ return vfs_parse_fs_param(fc, param);
+ }
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+}
+
+/**
+ * sys_fsconfig - Set parameters and trigger actions on a context
+ * @fd: The filesystem context to act upon
+ * @cmd: The action to take
+ * @_key: Where appropriate, the parameter key to set
+ * @_value: Where appropriate, the parameter value to set
+ * @aux: Additional information for the value
+ *
+ * This system call is used to set parameters on a context, including
+ * superblock settings, data source and security labelling.
+ *
+ * Actions include triggering the creation of a superblock and the
+ * reconfiguration of the superblock attached to the specified context.
+ *
+ * When setting a parameter, @cmd indicates the type of value being proposed
+ * and @_key indicates the parameter to be altered.
+ *
+ * @_value and @aux are used to specify the value, should a value be required:
+ *
+ * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean
+ * in nature. The key may be prefixed with "no" to invert the
+ * setting. @_value must be NULL and @aux must be 0.
+ *
+ * (*) fsconfig_set_string: A string value is specified. The parameter can be
+ * expecting boolean, integer, string or take a path. A conversion to an
+ * appropriate type will be attempted (which may include looking up as a
+ * path). @_value points to a NUL-terminated string and @aux must be 0.
+ *
+ * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the
+ * blob and @aux indicates its size. The parameter must be expecting a
+ * blob.
+ *
+ * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be
+ * expecting a path object. @_value points to a NUL-terminated string that
+ * is the path and @aux is a file descriptor at which to start a relative
+ * lookup or AT_FDCWD.
+ *
+ * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH
+ * implied.
+ *
+ * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be
+ * NULL and @aux indicates the file descriptor.
+ */
+SYSCALL_DEFINE5(fsconfig,
+ int, fd,
+ unsigned int, cmd,
+ const char __user *, _key,
+ const void __user *, _value,
+ int, aux)
+{
+ struct fs_context *fc;
+ struct fd f;
+ int ret;
+
+ struct fs_parameter param = {
+ .type = fs_value_is_undefined,
+ };
+
+ if (fd < 0)
+ return -EINVAL;
+
+ switch (cmd) {
+ case FSCONFIG_SET_FLAG:
+ if (!_key || _value || aux)
+ return -EINVAL;
+ break;
+ case FSCONFIG_SET_STRING:
+ if (!_key || !_value || aux)
+ return -EINVAL;
+ break;
+ case FSCONFIG_SET_BINARY:
+ if (!_key || !_value || aux <= 0 || aux > 1024 * 1024)
+ return -EINVAL;
+ break;
+ case FSCONFIG_SET_PATH:
+ case FSCONFIG_SET_PATH_EMPTY:
+ if (!_key || !_value || (aux != AT_FDCWD && aux < 0))
+ return -EINVAL;
+ break;
+ case FSCONFIG_SET_FD:
+ if (!_key || _value || aux < 0)
+ return -EINVAL;
+ break;
+ case FSCONFIG_CMD_CREATE:
+ case FSCONFIG_CMD_RECONFIGURE:
+ if (_key || _value || aux)
+ return -EINVAL;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+ ret = -EINVAL;
+ if (f.file->f_op != &fscontext_fops)
+ goto out_f;
+
+ fc = f.file->private_data;
+ if (fc->ops == &legacy_fs_context_ops) {
+ switch (cmd) {
+ case FSCONFIG_SET_BINARY:
+ case FSCONFIG_SET_PATH:
+ case FSCONFIG_SET_PATH_EMPTY:
+ case FSCONFIG_SET_FD:
+ ret = -EOPNOTSUPP;
+ goto out_f;
+ }
+ }
+
+ if (_key) {
+ param.key = strndup_user(_key, 256);
+ if (IS_ERR(param.key)) {
+ ret = PTR_ERR(param.key);
+ goto out_f;
+ }
+ }
+
+ switch (cmd) {
+ case FSCONFIG_SET_FLAG:
+ param.type = fs_value_is_flag;
+ break;
+ case FSCONFIG_SET_STRING:
+ param.type = fs_value_is_string;
+ param.string = strndup_user(_value, 256);
+ if (IS_ERR(param.string)) {
+ ret = PTR_ERR(param.string);
+ goto out_key;
+ }
+ param.size = strlen(param.string);
+ break;
+ case FSCONFIG_SET_BINARY:
+ param.type = fs_value_is_blob;
+ param.size = aux;
+ param.blob = memdup_user_nul(_value, aux);
+ if (IS_ERR(param.blob)) {
+ ret = PTR_ERR(param.blob);
+ goto out_key;
+ }
+ break;
+ case FSCONFIG_SET_PATH:
+ param.type = fs_value_is_filename;
+ param.name = getname_flags(_value, 0, NULL);
+ if (IS_ERR(param.name)) {
+ ret = PTR_ERR(param.name);
+ goto out_key;
+ }
+ param.dirfd = aux;
+ param.size = strlen(param.name->name);
+ break;
+ case FSCONFIG_SET_PATH_EMPTY:
+ param.type = fs_value_is_filename_empty;
+ param.name = getname_flags(_value, LOOKUP_EMPTY, NULL);
+ if (IS_ERR(param.name)) {
+ ret = PTR_ERR(param.name);
+ goto out_key;
+ }
+ param.dirfd = aux;
+ param.size = strlen(param.name->name);
+ break;
+ case FSCONFIG_SET_FD:
+ param.type = fs_value_is_file;
+ ret = -EBADF;
+ param.file = fget(aux);
+ if (!param.file)
+ goto out_key;
+ break;
+ default:
+ break;
+ }
+
+ ret = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (ret == 0) {
+ ret = vfs_fsconfig_locked(fc, cmd, &param);
+ mutex_unlock(&fc->uapi_mutex);
+ }
+
+ /* Clean up the our record of any value that we obtained from
+ * userspace. Note that the value may have been stolen by the LSM or
+ * filesystem, in which case the value pointer will have been cleared.
+ */
+ switch (cmd) {
+ case FSCONFIG_SET_STRING:
+ case FSCONFIG_SET_BINARY:
+ kfree(param.string);
+ break;
+ case FSCONFIG_SET_PATH:
+ case FSCONFIG_SET_PATH_EMPTY:
+ if (param.name)
+ putname(param.name);
+ break;
+ case FSCONFIG_SET_FD:
+ if (param.file)
+ fput(param.file);
+ break;
+ default:
+ break;
+ }
+out_key:
+ kfree(param.key);
+out_f:
+ fdput(f);
+ return ret;
+}
diff --git a/fs/internal.h b/fs/internal.h
index 17a8ae967493..0010889f2e85 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,8 +55,11 @@ extern void __init chrdev_init(void);
/*
* fs_context.c
*/
+extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void fc_drop_locked(struct fs_context *);
+extern void vfs_clean_context(struct fs_context *fc);
+extern int finish_clean_context(struct fs_context *fc);
/*
* namei.c
@@ -92,6 +95,7 @@ extern void __init mnt_init(void);
extern int __mnt_want_write_file(struct file *);
extern void __mnt_drop_write_file(struct file *);
+extern void dissolve_on_fput(struct vfsmount *);
/*
* fs_struct.c
*/
diff --git a/fs/namespace.c b/fs/namespace.c
index c9cab307fa77..3357c3d65475 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
#include <linux/init.h> /* init_rootfs */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
+#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
@@ -1832,6 +1833,27 @@ struct vfsmount *collect_mounts(const struct path *path)
return &tree->mnt;
}
+static void free_mnt_ns(struct mnt_namespace *);
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
+
+void dissolve_on_fput(struct vfsmount *mnt)
+{
+ struct mnt_namespace *ns;
+ namespace_lock();
+ lock_mount_hash();
+ ns = real_mount(mnt)->mnt_ns;
+ if (ns) {
+ if (is_anon_ns(ns))
+ umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+ else
+ ns = NULL;
+ }
+ unlock_mount_hash();
+ namespace_unlock();
+ if (ns)
+ free_mnt_ns(ns);
+}
+
void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
@@ -2065,6 +2087,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
+ if (source_mnt->mnt_ns) {
+ /* move from anon - the caller will destroy */
+ list_del_init(&source_mnt->mnt_ns->list);
+ }
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
@@ -2222,6 +2248,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
+static struct mount *__do_loopback(struct path *old_path, int recurse)
+{
+ struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
+
+ if (IS_MNT_UNBINDABLE(old))
+ return mnt;
+
+ if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
+ return mnt;
+
+ if (!recurse && has_locked_children(old, old_path->dentry))
+ return mnt;
+
+ if (recurse)
+ mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
+ else
+ mnt = clone_mnt(old, old_path->dentry, 0);
+
+ if (!IS_ERR(mnt))
+ mnt->mnt.mnt_flags &= ~MNT_LOCKED;
+
+ return mnt;
+}
+
/*
* do loopback mount.
*/
@@ -2229,7 +2279,7 @@ static int do_loopback(struct path *path, const char *old_name,
int recurse)
{
struct path old_path;
- struct mount *mnt = NULL, *old, *parent;
+ struct mount *mnt = NULL, *parent;
struct mountpoint *mp;
int err;
if (!old_name || !*old_name)
@@ -2243,38 +2293,21 @@ static int do_loopback(struct path *path, const char *old_name,
goto out;
mp = lock_mount(path);
- err = PTR_ERR(mp);
- if (IS_ERR(mp))
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
goto out;
+ }
- old = real_mount(old_path.mnt);
parent = real_mount(path->mnt);
-
- err = -EINVAL;
- if (IS_MNT_UNBINDABLE(old))
- goto out2;
-
if (!check_mnt(parent))
goto out2;
- if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
- goto out2;
-
- if (!recurse && has_locked_children(old, old_path.dentry))
- goto out2;
-
- if (recurse)
- mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
- else
- mnt = clone_mnt(old, old_path.dentry, 0);
-
+ mnt = __do_loopback(&old_path, recurse);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto out2;
}
- mnt->mnt.mnt_flags &= ~MNT_LOCKED;
-
err = graft_tree(mnt, parent, mp);
if (err) {
lock_mount_hash();
@@ -2288,6 +2321,96 @@ out:
return err;
}
+static struct file *open_detached_copy(struct path *path, bool recursive)
+{
+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+ struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
+ struct mount *mnt, *p;
+ struct file *file;
+
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
+
+ namespace_lock();
+ mnt = __do_loopback(path, recursive);
+ if (IS_ERR(mnt)) {
+ namespace_unlock();
+ free_mnt_ns(ns);
+ return ERR_CAST(mnt);
+ }
+
+ lock_mount_hash();
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ p->mnt_ns = ns;
+ ns->mounts++;
+ }
+ ns->root = mnt;
+ list_add_tail(&ns->list, &mnt->mnt_list);
+ mntget(&mnt->mnt);
+ unlock_mount_hash();
+ namespace_unlock();
+
+ mntput(path->mnt);
+ path->mnt = &mnt->mnt;
+ file = dentry_open(path, O_PATH, current_cred());
+ if (IS_ERR(file))
+ dissolve_on_fput(path->mnt);
+ else
+ file->f_mode |= FMODE_NEED_UNMOUNT;
+ return file;
+}
+
+SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
+{
+ struct file *file;
+ struct path path;
+ int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+ bool detached = flags & OPEN_TREE_CLONE;
+ int error;
+ int fd;
+
+ BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
+
+ if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
+ AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
+ OPEN_TREE_CLOEXEC))
+ return -EINVAL;
+
+ if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
+ return -EINVAL;
+
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ if (detached && !may_mount())
+ return -EPERM;
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ error = user_path_at(dfd, filename, lookup_flags, &path);
+ if (unlikely(error)) {
+ file = ERR_PTR(error);
+ } else {
+ if (detached)
+ file = open_detached_copy(&path, flags & AT_RECURSIVE);
+ else
+ file = dentry_open(&path, O_PATH, current_cred());
+ path_put(&path);
+ }
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ return PTR_ERR(file);
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
/*
* Don't allow locked mount flags to be cleared.
*
@@ -2426,72 +2549,117 @@ static inline int tree_contains_unbindable(struct mount *mnt)
return 0;
}
-static int do_move_mount(struct path *path, const char *old_name)
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree. Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
{
- struct path old_path, parent_path;
+ struct mount *p;
+ bool ret = false;
+
+ lock_mount_hash();
+ for (p = subtree; p; p = next_mnt(p, subtree))
+ if (mnt_ns_loop(p->mnt.mnt_root))
+ goto out;
+
+ ret = true;
+out:
+ unlock_mount_hash();
+ return ret;
+}
+
+static int do_move_mount(struct path *old_path, struct path *new_path)
+{
+ struct path parent_path = {.mnt = NULL, .dentry = NULL};
+ struct mnt_namespace *ns;
struct mount *p;
struct mount *old;
struct mountpoint *mp;
int err;
- if (!old_name || !*old_name)
- return -EINVAL;
- err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
- if (err)
- return err;
+ bool attached;
- mp = lock_mount(path);
- err = PTR_ERR(mp);
+ mp = lock_mount(new_path);
if (IS_ERR(mp))
- goto out;
+ return PTR_ERR(mp);
- old = real_mount(old_path.mnt);
- p = real_mount(path->mnt);
+ old = real_mount(old_path->mnt);
+ p = real_mount(new_path->mnt);
+ attached = mnt_has_parent(old);
+ ns = old->mnt_ns;
err = -EINVAL;
- if (!check_mnt(p) || !check_mnt(old))
- goto out1;
+ /* The mountpoint must be in our namespace. */
+ if (!check_mnt(p))
+ goto out;
- if (old->mnt.mnt_flags & MNT_LOCKED)
- goto out1;
+ /* The thing moved should be either ours or completely unattached. */
+ if (attached && !check_mnt(old))
+ goto out;
- err = -EINVAL;
- if (old_path.dentry != old_path.mnt->mnt_root)
- goto out1;
+ if (!attached && !is_anon_ns(ns))
+ goto out;
- if (!mnt_has_parent(old))
- goto out1;
+ if (old->mnt.mnt_flags & MNT_LOCKED)
+ goto out;
- if (d_is_dir(path->dentry) !=
- d_is_dir(old_path.dentry))
- goto out1;
+ if (old_path->dentry != old_path->mnt->mnt_root)
+ goto out;
+
+ if (d_is_dir(new_path->dentry) !=
+ d_is_dir(old_path->dentry))
+ goto out;
/*
* Don't move a mount residing in a shared parent.
*/
- if (IS_MNT_SHARED(old->mnt_parent))
- goto out1;
+ if (attached && IS_MNT_SHARED(old->mnt_parent))
+ goto out;
/*
* Don't move a mount tree containing unbindable mounts to a destination
* mount which is shared.
*/
if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
- goto out1;
+ goto out;
err = -ELOOP;
+ if (!check_for_nsfs_mounts(old))
+ goto out;
for (; mnt_has_parent(p); p = p->mnt_parent)
if (p == old)
- goto out1;
+ goto out;
- err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
+ err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
+ attached ? &parent_path : NULL);
if (err)
- goto out1;
+ goto out;
/* if the mount is moved, it should no longer be expire
* automatically */
list_del_init(&old->mnt_expire);
-out1:
- unlock_mount(mp);
out:
- if (!err)
+ unlock_mount(mp);
+ if (!err) {
path_put(&parent_path);
+ if (!attached)
+ free_mnt_ns(ns);
+ }
+ return err;
+}
+
+static int do_move_mount_old(struct path *path, const char *old_name)
+{
+ struct path old_path;
+ int err;
+
+ if (!old_name || !*old_name)
+ return -EINVAL;
+
+ err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
+ if (err)
+ return err;
+
+ err = do_move_mount(&old_path, path);
path_put(&old_path);
return err;
}
@@ -2937,7 +3105,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
- retval = do_move_mount(&path, dev_name);
+ retval = do_move_mount_old(&path, dev_name);
else
retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
dev_name, data_page);
@@ -3166,6 +3334,203 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
}
/*
+ * Create a kernel mount representation for a new, prepared superblock
+ * (specified by fs_fd) and attach to an open_tree-like file descriptor.
+ */
+SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
+ unsigned int, attr_flags)
+{
+ struct mnt_namespace *ns;
+ struct fs_context *fc;
+ struct file *file;
+ struct path newmount;
+ struct mount *mnt;
+ struct fd f;
+ unsigned int mnt_flags = 0;
+ long ret;
+
+ if (!may_mount())
+ return -EPERM;
+
+ if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
+ return -EINVAL;
+
+ if (attr_flags & ~(MOUNT_ATTR_RDONLY |
+ MOUNT_ATTR_NOSUID |
+ MOUNT_ATTR_NODEV |
+ MOUNT_ATTR_NOEXEC |
+ MOUNT_ATTR__ATIME |
+ MOUNT_ATTR_NODIRATIME))
+ return -EINVAL;
+
+ if (attr_flags & MOUNT_ATTR_RDONLY)
+ mnt_flags |= MNT_READONLY;
+ if (attr_flags & MOUNT_ATTR_NOSUID)
+ mnt_flags |= MNT_NOSUID;
+ if (attr_flags & MOUNT_ATTR_NODEV)
+ mnt_flags |= MNT_NODEV;
+ if (attr_flags & MOUNT_ATTR_NOEXEC)
+ mnt_flags |= MNT_NOEXEC;
+ if (attr_flags & MOUNT_ATTR_NODIRATIME)
+ mnt_flags |= MNT_NODIRATIME;
+
+ switch (attr_flags & MOUNT_ATTR__ATIME) {
+ case MOUNT_ATTR_STRICTATIME:
+ break;
+ case MOUNT_ATTR_NOATIME:
+ mnt_flags |= MNT_NOATIME;
+ break;
+ case MOUNT_ATTR_RELATIME:
+ mnt_flags |= MNT_RELATIME;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ f = fdget(fs_fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = -EINVAL;
+ if (f.file->f_op != &fscontext_fops)
+ goto err_fsfd;
+
+ fc = f.file->private_data;
+
+ ret = mutex_lock_interruptible(&fc->uapi_mutex);
+ if (ret < 0)
+ goto err_fsfd;
+
+ /* There must be a valid superblock or we can't mount it */
+ ret = -EINVAL;
+ if (!fc->root)
+ goto err_unlock;
+
+ ret = -EPERM;
+ if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
+ pr_warn("VFS: Mount too revealing\n");
+ goto err_unlock;
+ }
+
+ ret = -EBUSY;
+ if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
+ goto err_unlock;
+
+ ret = -EPERM;
+ if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
+ goto err_unlock;
+
+ newmount.mnt = vfs_create_mount(fc);
+ if (IS_ERR(newmount.mnt)) {
+ ret = PTR_ERR(newmount.mnt);
+ goto err_unlock;
+ }
+ newmount.dentry = dget(fc->root);
+ newmount.mnt->mnt_flags = mnt_flags;
+
+ /* We've done the mount bit - now move the file context into more or
+ * less the same state as if we'd done an fspick(). We don't want to
+ * do any memory allocation or anything like that at this point as we
+ * don't want to have to handle any errors incurred.
+ */
+ vfs_clean_context(fc);
+
+ ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
+ if (IS_ERR(ns)) {
+ ret = PTR_ERR(ns);
+ goto err_path;
+ }
+ mnt = real_mount(newmount.mnt);
+ mnt->mnt_ns = ns;
+ ns->root = mnt;
+ ns->mounts = 1;
+ list_add(&mnt->mnt_list, &ns->list);
+
+ /* Attach to an apparent O_PATH fd with a note that we need to unmount
+ * it, not just simply put it.
+ */
+ file = dentry_open(&newmount, O_PATH, fc->cred);
+ if (IS_ERR(file)) {
+ dissolve_on_fput(newmount.mnt);
+ ret = PTR_ERR(file);
+ goto err_path;
+ }
+ file->f_mode |= FMODE_NEED_UNMOUNT;
+
+ ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
+ if (ret >= 0)
+ fd_install(ret, file);
+ else
+ fput(file);
+
+err_path:
+ path_put(&newmount);
+err_unlock:
+ mutex_unlock(&fc->uapi_mutex);
+err_fsfd:
+ fdput(f);
+ return ret;
+}
+
+/*
+ * Move a mount from one place to another. In combination with
+ * fsopen()/fsmount() this is used to install a new mount and in combination
+ * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
+ * a mount subtree.
+ *
+ * Note the flags value is a combination of MOVE_MOUNT_* flags.
+ */
+SYSCALL_DEFINE5(move_mount,
+ int, from_dfd, const char *, from_pathname,
+ int, to_dfd, const char *, to_pathname,
+ unsigned int, flags)
+{
+ struct path from_path, to_path;
+ unsigned int lflags;
+ int ret = 0;
+
+ if (!may_mount())
+ return -EPERM;
+
+ if (flags & ~MOVE_MOUNT__MASK)
+ return -EINVAL;
+
+ /* If someone gives a pathname, they aren't permitted to move
+ * from an fd that requires unmount as we can't get at the flag
+ * to clear it afterwards.
+ */
+ lflags = 0;
+ if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
+ if (ret < 0)
+ return ret;
+
+ lflags = 0;
+ if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
+ if (ret < 0)
+ goto out_from;
+
+ ret = security_move_mount(&from_path, &to_path);
+ if (ret < 0)
+ goto out_to;
+
+ ret = do_move_mount(&from_path, &to_path);
+
+out_to:
+ path_put(&to_path);
+out_from:
+ path_put(&from_path);
+ return ret;
+}
+
+/*
* Return true if path is reachable from root
*
* namespace_sem or mount_lock is held