aboutsummaryrefslogtreecommitdiffstats
path: root/fs/overlayfs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--fs/overlayfs/Kconfig6
-rw-r--r--fs/overlayfs/copy_up.c379
-rw-r--r--fs/overlayfs/dir.c271
-rw-r--r--fs/overlayfs/export.c86
-rw-r--r--fs/overlayfs/file.c381
-rw-r--r--fs/overlayfs/inode.c505
-rw-r--r--fs/overlayfs/namei.c275
-rw-r--r--fs/overlayfs/overlayfs.h419
-rw-r--r--fs/overlayfs/ovl_entry.h29
-rw-r--r--fs/overlayfs/readdir.c261
-rw-r--r--fs/overlayfs/super.c920
-rw-r--r--fs/overlayfs/util.c395
12 files changed, 2669 insertions, 1258 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 714c14c47ca5..dd188c7996b3 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -9,7 +9,7 @@ config OVERLAY_FS
'lower' filesystem is either hidden or, in the case of directories,
merged with the 'upper' object.
- For more information see Documentation/filesystems/overlayfs.txt
+ For more information see Documentation/filesystems/overlayfs.rst
config OVERLAY_FS_REDIRECT_DIR
bool "Overlayfs: turn on redirect directory feature by default"
@@ -38,7 +38,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
If backward compatibility is not an issue, then it is safe and
recommended to say N here.
- For more information, see Documentation/filesystems/overlayfs.txt
+ For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say Y.
@@ -103,7 +103,7 @@ config OVERLAY_FS_XINO_AUTO
If compatibility with applications that expect 32bit inodes is not an
issue, then it is safe and recommended to say Y here.
- For more information, see Documentation/filesystems/overlayfs.txt
+ For more information, see Documentation/filesystems/overlayfs.rst
If unsure, say N.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9fc47c2e078d..f436d8847f08 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/fileattr.h>
#include <linux/splice.h>
#include <linux/xattr.h>
#include <linux/security.h>
@@ -36,11 +37,19 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param)
module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
-int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+static bool ovl_must_copy_xattr(const char *name)
{
+ return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
+ !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
+ !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
+}
+
+int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new)
+{
+ struct dentry *old = oldpath->dentry;
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
- int uninitialized_var(error);
+ int error = 0;
size_t slen;
if (!(old->d_inode->i_opflags & IOP_XATTR) ||
@@ -54,7 +63,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
return list_size;
}
- buf = kzalloc(list_size, GFP_KERNEL);
+ buf = kvzalloc(list_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -74,12 +83,20 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
}
list_size -= slen;
- if (ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(sb, name))
continue;
+
+ error = security_inode_copy_up_xattr(name);
+ if (error < 0 && error != -EOPNOTSUPP)
+ break;
+ if (error == 1) {
+ error = 0;
+ continue; /* Discard */
+ }
retry:
- size = vfs_getxattr(old, name, value, value_size);
+ size = ovl_do_getxattr(oldpath, name, value, value_size);
if (size == -ERANGE)
- size = vfs_getxattr(old, name, NULL, 0);
+ size = ovl_do_getxattr(oldpath, name, NULL, 0);
if (size < 0) {
error = size;
@@ -89,37 +106,98 @@ retry:
if (size > value_size) {
void *new;
- new = krealloc(value, size, GFP_KERNEL);
+ new = kvmalloc(size, GFP_KERNEL);
if (!new) {
error = -ENOMEM;
break;
}
+ kvfree(value);
value = new;
value_size = size;
goto retry;
}
- error = security_inode_copy_up_xattr(name);
- if (error < 0 && error != -EOPNOTSUPP)
- break;
- if (error == 1) {
+ error = ovl_do_setxattr(OVL_FS(sb), new, name, value, size, 0);
+ if (error) {
+ if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
+ break;
+
+ /* Ignore failure to copy unknown xattrs */
error = 0;
- continue; /* Discard */
}
- error = vfs_setxattr(new, name, value, size, 0);
- if (error)
- break;
}
- kfree(value);
+ kvfree(value);
out:
- kfree(buf);
+ kvfree(buf);
return error;
}
-static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
+ const struct path *new)
+{
+ struct fileattr oldfa = { .flags_valid = true };
+ struct fileattr newfa = { .flags_valid = true };
+ int err;
+
+ err = ovl_real_fileattr_get(old, &oldfa);
+ if (err) {
+ /* Ntfs-3g returns -EINVAL for "no fileattr support" */
+ if (err == -ENOTTY || err == -EINVAL)
+ return 0;
+ pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
+ old->dentry, err);
+ return err;
+ }
+
+ /*
+ * We cannot set immutable and append-only flags on upper inode,
+ * because we would not be able to link upper inode to upper dir
+ * not set overlay private xattr on upper inode.
+ * Store these flags in overlay.protattr xattr instead.
+ */
+ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+ err = ovl_set_protattr(inode, new->dentry, &oldfa);
+ if (err == -EPERM)
+ pr_warn_once("copying fileattr: no xattr on upper\n");
+ else if (err)
+ return err;
+ }
+
+ /* Don't bother copying flags if none are set */
+ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK))
+ return 0;
+
+ err = ovl_real_fileattr_get(new, &newfa);
+ if (err) {
+ /*
+ * Returning an error if upper doesn't support fileattr will
+ * result in a regression, so revert to the old behavior.
+ */
+ if (err == -ENOTTY || err == -EINVAL) {
+ pr_warn_once("copying fileattr: no support on upper\n");
+ return 0;
+ }
+ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+ new->dentry, err);
+ return err;
+ }
+
+ BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL);
+ newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK;
+ newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK);
+
+ BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+ newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK;
+ newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK);
+
+ return ovl_real_fileattr_set(new, &newfa);
+}
+
+static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
+ struct file *new_file, loff_t len)
{
+ struct path datapath;
struct file *old_file;
- struct file *new_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
@@ -128,28 +206,22 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
bool skip_hole = false;
int error = 0;
- if (len == 0)
- return 0;
+ ovl_path_lowerdata(dentry, &datapath);
+ if (WARN_ON(datapath.dentry == NULL))
+ return -EIO;
- old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
+ old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
- new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
- if (IS_ERR(new_file)) {
- error = PTR_ERR(new_file);
- goto out_fput;
- }
-
/* Try to use clone_file_range to clone up within the same fs */
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
if (cloned == len)
- goto out;
+ goto out_fput;
/* Couldn't clone, so now we try to copy the data */
/* Check if lower fs supports seek operation */
- if (old_file->f_mode & FMODE_LSEEK &&
- old_file->f_op->llseek)
+ if (old_file->f_mode & FMODE_LSEEK)
skip_hole = true;
while (len) {
@@ -205,26 +277,26 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
len -= bytes;
}
-out:
- if (!error)
+ if (!error && ovl_should_sync(ofs))
error = vfs_fsync(new_file, 0);
- fput(new_file);
out_fput:
fput(old_file);
return error;
}
-static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+static int ovl_set_size(struct ovl_fs *ofs,
+ struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid = ATTR_SIZE,
.ia_size = stat->size,
};
- return notify_change(upperdentry, &attr, NULL);
+ return ovl_do_notify_change(ofs, upperdentry, &attr);
}
-static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
+static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
@@ -233,10 +305,11 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
.ia_mtime = stat->mtime,
};
- return notify_change(upperdentry, &attr, NULL);
+ return ovl_do_notify_change(ofs, upperdentry, &attr);
}
-int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct kstat *stat)
{
int err = 0;
@@ -245,23 +318,24 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
.ia_valid = ATTR_MODE,
.ia_mode = stat->mode,
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
- .ia_uid = stat->uid,
- .ia_gid = stat->gid,
+ .ia_vfsuid = VFSUIDT_INIT(stat->uid),
+ .ia_vfsgid = VFSGIDT_INIT(stat->gid),
};
- err = notify_change(upperdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
if (!err)
- ovl_set_timestamps(upperdentry, stat);
+ ovl_set_timestamps(ofs, upperdentry, stat);
return err;
}
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+ bool is_upper)
{
struct ovl_fh *fh;
int fh_type, dwords;
@@ -305,7 +379,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
if (is_upper)
fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
fh->fb.len = sizeof(fh->fb) + buflen;
- fh->fb.uuid = *uuid;
+ if (ofs->config.uuid)
+ fh->fb.uuid = *uuid;
return fh;
@@ -314,7 +389,7 @@ out_err:
return ERR_PTR(err);
}
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
struct dentry *upper)
{
const struct ovl_fh *fh = NULL;
@@ -326,7 +401,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* up and a pure upper inode.
*/
if (ovl_can_decode_fh(lower->d_sb)) {
- fh = ovl_encode_real_fh(lower, false);
+ fh = ovl_encode_real_fh(ofs, lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
@@ -334,24 +409,26 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
/*
* Do not fail when upper doesn't support xattrs.
*/
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
fh ? fh->fb.len : 0, 0);
kfree(fh);
- return err;
+ /* Ignore -EPERM from setting "user.*" on symlink/special */
+ return err == -EPERM ? 0 : err;
}
/* Store file handle of @upper dir in @index dir entry */
-static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *index)
{
const struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(upper, true);
+ fh = ovl_encode_real_fh(ofs, upper, true);
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
+ err = ovl_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
kfree(fh);
return err;
@@ -365,6 +442,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
struct dentry *upper)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = d_inode(indexdir);
struct dentry *index = NULL;
@@ -387,29 +465,29 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
- err = ovl_get_index_name(origin, &name);
+ err = ovl_get_index_name(ofs, origin, &name);
if (err)
return err;
- temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
+ temp = ovl_create_temp(ofs, indexdir, OVL_CATTR(S_IFDIR | 0));
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto free_name;
- err = ovl_set_upper_fh(upper, temp);
+ err = ovl_set_upper_fh(ofs, upper, temp);
if (err)
goto out;
- index = lookup_one_len(name.name, indexdir, name.len);
+ index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
} else {
- err = ovl_do_rename(dir, temp, dir, index, 0);
+ err = ovl_do_rename(ofs, dir, temp, dir, index, 0);
dput(index);
}
out:
if (err)
- ovl_cleanup(dir, temp);
+ ovl_cleanup(ofs, dir, temp);
dput(temp);
free_name:
kfree(name.name);
@@ -436,6 +514,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
int err;
struct dentry *upper;
struct dentry *upperdir = ovl_dentry_upper(c->parent);
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(upperdir);
/* Mark parent "impure" because it may now contain non-pure upper */
@@ -448,16 +527,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
return err;
inode_lock_nested(udir, I_MUTEX_PARENT);
- upper = lookup_one_len(c->dentry->d_name.name, upperdir,
- c->dentry->d_name.len);
+ upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
+ c->dentry->d_name.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
- err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
+ err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
dput(upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_set_timestamps(ofs, upperdir, &c->pstat);
ovl_dentry_set_upper_alias(c->dentry);
}
}
@@ -470,32 +549,46 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
return err;
}
-static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct file *new_file;
int err;
- /*
- * Copy up data first and then xattrs. Writing data after
- * xattrs will remove security.capability xattr automatically.
- */
- if (S_ISREG(c->stat.mode) && !c->metacopy) {
- struct path upperpath, datapath;
+ if (!S_ISREG(c->stat.mode) || c->metacopy || !c->stat.size)
+ return 0;
- ovl_path_upper(c->dentry, &upperpath);
- if (WARN_ON(upperpath.dentry != NULL))
- return -EIO;
- upperpath.dentry = temp;
+ new_file = ovl_path_open(temp, O_LARGEFILE | O_WRONLY);
+ if (IS_ERR(new_file))
+ return PTR_ERR(new_file);
- ovl_path_lowerdata(c->dentry, &datapath);
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
- if (err)
- return err;
- }
+ err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size);
+ fput(new_file);
+
+ return err;
+}
- err = ovl_copy_xattr(c->lowerpath.dentry, temp);
+static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct inode *inode = d_inode(c->dentry);
+ struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = temp };
+ int err;
+
+ err = ovl_copy_xattr(c->dentry->d_sb, &c->lowerpath, temp);
if (err)
return err;
+ if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) {
+ /*
+ * Copy the fileattr inode flags that are the source of already
+ * copied i_flags
+ */
+ err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath);
+ if (err)
+ return err;
+ }
+
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
@@ -504,13 +597,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
* hard link.
*/
if (c->origin) {
- err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
+ err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
if (err)
return err;
}
if (c->metacopy) {
- err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
+ err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY,
NULL, 0, -EOPNOTSUPP);
if (err)
return err;
@@ -518,9 +611,9 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
inode_lock(temp->d_inode);
if (S_ISREG(c->stat.mode))
- err = ovl_set_size(temp, &c->stat);
+ err = ovl_set_size(ofs, temp, &c->stat);
if (!err)
- err = ovl_set_attr(temp, &c->stat);
+ err = ovl_set_attr(ofs, temp, &c->stat);
inode_unlock(temp->d_inode);
return err;
@@ -560,8 +653,10 @@ static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
*/
static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+ struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper;
struct ovl_cu_creds cc;
int err;
@@ -572,22 +667,32 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
.link = c->link
};
- err = ovl_lock_rename_workdir(c->workdir, c->destdir);
- if (err)
- return err;
+ /* workdir and destdir could be the same when copying up to indexdir */
+ err = -EIO;
+ if (lock_rename(c->workdir, c->destdir) != NULL)
+ goto unlock;
err = ovl_prep_cu_creds(c->dentry, &cc);
if (err)
goto unlock;
- temp = ovl_create_temp(c->workdir, &cattr);
+ temp = ovl_create_temp(ofs, c->workdir, &cattr);
ovl_revert_cu_creds(&cc);
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto unlock;
- err = ovl_copy_up_inode(c, temp);
+ /*
+ * Copy up data first and then xattrs. Writing data after
+ * xattrs will remove security.capability xattr automatically.
+ */
+ path.dentry = temp;
+ err = ovl_copy_up_data(c, &path);
+ if (err)
+ goto cleanup;
+
+ err = ovl_copy_up_metadata(c, temp);
if (err)
goto cleanup;
@@ -597,12 +702,13 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
goto cleanup;
}
- upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
+ c->destname.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto cleanup;
- err = ovl_do_rename(wdir, temp, udir, upper, 0);
+ err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0);
dput(upper);
if (err)
goto cleanup;
@@ -619,7 +725,7 @@ unlock:
return err;
cleanup:
- ovl_cleanup(wdir, temp);
+ ovl_cleanup(ofs, wdir, temp);
dput(temp);
goto unlock;
}
@@ -627,8 +733,10 @@ cleanup:
/* Copyup using O_TMPFILE which does not require cross dir locking */
static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
{
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(c->destdir);
struct dentry *temp, *upper;
+ struct file *tmpfile;
struct ovl_cu_creds cc;
int err;
@@ -636,37 +744,43 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (err)
return err;
- temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+ tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
ovl_revert_cu_creds(&cc);
- if (IS_ERR(temp))
- return PTR_ERR(temp);
+ if (IS_ERR(tmpfile))
+ return PTR_ERR(tmpfile);
- err = ovl_copy_up_inode(c, temp);
+ temp = tmpfile->f_path.dentry;
+ if (!c->metacopy && c->stat.size) {
+ err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size);
+ if (err)
+ return err;
+ }
+
+ err = ovl_copy_up_metadata(c, temp);
if (err)
- goto out_dput;
+ goto out_fput;
inode_lock_nested(udir, I_MUTEX_PARENT);
- upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
+ c->destname.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
- err = ovl_do_link(temp, udir, upper);
+ err = ovl_do_link(ofs, temp, udir, upper);
dput(upper);
}
inode_unlock(udir);
if (err)
- goto out_dput;
+ goto out_fput;
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
- ovl_inode_update(d_inode(c->dentry), temp);
-
- return 0;
+ ovl_inode_update(d_inode(c->dentry), dget(temp));
-out_dput:
- dput(temp);
+out_fput:
+ fput(tmpfile);
return err;
}
@@ -682,7 +796,7 @@ out_dput:
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
- struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
bool to_index = false;
/*
@@ -704,7 +818,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
- err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+ err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
if (err)
return err;
} else if (WARN_ON(!c->parent)) {
@@ -739,7 +853,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
/* Restore timestamps on parent (best effort) */
inode_lock(udir);
- ovl_set_timestamps(c->destdir, &c->pstat);
+ ovl_set_timestamps(ofs, c->destdir, &c->pstat);
inode_unlock(udir);
ovl_dentry_set_upper_alias(c->dentry);
@@ -768,30 +882,50 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
return true;
}
+static ssize_t ovl_getxattr_value(const struct path *path, char *name, char **value)
+{
+ ssize_t res;
+ char *buf;
+
+ res = ovl_do_getxattr(path, name, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ res = 0;
+
+ if (res > 0) {
+ buf = kzalloc(res, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ res = ovl_do_getxattr(path, name, buf, res);
+ if (res < 0)
+ kfree(buf);
+ else
+ *value = buf;
+ }
+ return res;
+}
+
/* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
- struct path upperpath, datapath;
+ struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct path upperpath;
int err;
char *capability = NULL;
- ssize_t uninitialized_var(cap_size);
+ ssize_t cap_size;
ovl_path_upper(c->dentry, &upperpath);
if (WARN_ON(upperpath.dentry == NULL))
return -EIO;
- ovl_path_lowerdata(c->dentry, &datapath);
- if (WARN_ON(datapath.dentry == NULL))
- return -EIO;
-
if (c->stat.size) {
- err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
- &capability, 0);
- if (err < 0 && err != -ENODATA)
+ err = cap_size = ovl_getxattr_value(&upperpath, XATTR_NAME_CAPS,
+ &capability);
+ if (cap_size < 0)
goto out;
}
- err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ err = ovl_copy_up_data(c, &upperpath);
if (err)
goto out_free;
@@ -800,14 +934,14 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
* don't want that to happen for normal copy-up operation.
*/
if (capability) {
- err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
+ err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
capability, cap_size, 0);
if (err)
goto out_free;
}
- err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+ err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
if (err)
goto out_free;
@@ -882,10 +1016,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return err;
}
-int ovl_copy_up_flags(struct dentry *dentry, int flags)
+static int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
- const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
+ const struct cred *old_cred;
bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
/*
@@ -896,6 +1030,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
if (WARN_ON(disconnected && d_is_dir(dentry)))
return -EIO;
+ old_cred = ovl_override_creds(dentry->d_sb);
while (!err) {
struct dentry *next;
struct dentry *parent = NULL;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 8e57d5372b8f..6b03457f72bb 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -23,15 +23,15 @@ MODULE_PARM_DESC(redirect_max,
static int ovl_set_redirect(struct dentry *dentry, bool samedir);
-int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry)
{
int err;
dget(wdentry);
if (d_is_dir(wdentry))
- err = ovl_do_rmdir(wdir, wdentry);
+ err = ovl_do_rmdir(ofs, wdir, wdentry);
else
- err = ovl_do_unlink(wdir, wdentry);
+ err = ovl_do_unlink(ofs, wdir, wdentry);
dput(wdentry);
if (err) {
@@ -42,7 +42,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
return err;
}
-static struct dentry *ovl_lookup_temp(struct dentry *workdir)
+struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
{
struct dentry *temp;
char name[20];
@@ -51,7 +51,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir)
/* counter is allowed to wrap, since temp dentries are ephemeral */
snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
- temp = lookup_one_len(name, workdir, strlen(name));
+ temp = ovl_lookup_upper(ofs, name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
pr_err("workdir/%s already exists\n", name);
dput(temp);
@@ -62,35 +62,59 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir)
}
/* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
{
int err;
struct dentry *whiteout;
+ struct dentry *workdir = ofs->workdir;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir);
- if (IS_ERR(whiteout))
- return whiteout;
+ if (!ofs->whiteout) {
+ whiteout = ovl_lookup_temp(ofs, workdir);
+ if (IS_ERR(whiteout))
+ goto out;
- err = ovl_do_whiteout(wdir, whiteout);
- if (err) {
- dput(whiteout);
- whiteout = ERR_PTR(err);
+ err = ovl_do_whiteout(ofs, wdir, whiteout);
+ if (err) {
+ dput(whiteout);
+ whiteout = ERR_PTR(err);
+ goto out;
+ }
+ ofs->whiteout = whiteout;
}
+ if (ofs->share_whiteout) {
+ whiteout = ovl_lookup_temp(ofs, workdir);
+ if (IS_ERR(whiteout))
+ goto out;
+
+ err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
+ if (!err)
+ goto out;
+
+ if (err != -EMLINK) {
+ pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+ ofs->whiteout->d_inode->i_nlink, err);
+ ofs->share_whiteout = false;
+ }
+ dput(whiteout);
+ }
+ whiteout = ofs->whiteout;
+ ofs->whiteout = NULL;
+out:
return whiteout;
}
/* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
struct dentry *dentry)
{
- struct inode *wdir = workdir->d_inode;
+ struct inode *wdir = ofs->workdir->d_inode;
struct dentry *whiteout;
int err;
int flags = 0;
- whiteout = ovl_whiteout(workdir);
+ whiteout = ovl_whiteout(ofs);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
return err;
@@ -98,28 +122,28 @@ int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
if (d_is_dir(dentry))
flags = RENAME_EXCHANGE;
- err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
+ err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags);
if (err)
goto kill_whiteout;
if (flags)
- ovl_cleanup(wdir, dentry);
+ ovl_cleanup(ofs, wdir, dentry);
out:
dput(whiteout);
return err;
kill_whiteout:
- ovl_cleanup(wdir, whiteout);
+ ovl_cleanup(ofs, wdir, whiteout);
goto out;
}
-static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
- umode_t mode)
+int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
+ struct dentry **newdentry, umode_t mode)
{
int err;
struct dentry *d, *dentry = *newdentry;
- err = ovl_do_mkdir(dir, dentry, mode);
+ err = ovl_do_mkdir(ofs, dir, dentry, mode);
if (err)
return err;
@@ -131,8 +155,8 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
* to it unhashed and negative. If that happens, try to
* lookup a new hashed and positive dentry.
*/
- d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
- dentry->d_name.len);
+ d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent,
+ dentry->d_name.len);
if (IS_ERR(d)) {
pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
dentry, err);
@@ -144,8 +168,8 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
return 0;
}
-struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
- struct ovl_cattr *attr)
+struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
+ struct dentry *newdentry, struct ovl_cattr *attr)
{
int err;
@@ -157,28 +181,28 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
goto out;
if (attr->hardlink) {
- err = ovl_do_link(attr->hardlink, dir, newdentry);
+ err = ovl_do_link(ofs, attr->hardlink, dir, newdentry);
} else {
switch (attr->mode & S_IFMT) {
case S_IFREG:
- err = ovl_do_create(dir, newdentry, attr->mode);
+ err = ovl_do_create(ofs, dir, newdentry, attr->mode);
break;
case S_IFDIR:
/* mkdir is special... */
- err = ovl_mkdir_real(dir, &newdentry, attr->mode);
+ err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- err = ovl_do_mknod(dir, newdentry, attr->mode,
+ err = ovl_do_mknod(ofs, dir, newdentry, attr->mode,
attr->rdev);
break;
case S_IFLNK:
- err = ovl_do_symlink(dir, newdentry, attr->link);
+ err = ovl_do_symlink(ofs, dir, newdentry, attr->link);
break;
default:
@@ -200,18 +224,20 @@ out:
return newdentry;
}
-struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
+ struct ovl_cattr *attr)
{
- return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir),
- attr);
+ return ovl_create_real(ofs, d_inode(workdir),
+ ovl_lookup_temp(ofs, workdir), attr);
}
static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
int xerr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
int err;
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
@@ -243,6 +269,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
ovl_dir_modified(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
+ ovl_dentry_update_reval(dentry, newdentry,
+ DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+
if (!hardlink) {
/*
* ovl_obtain_alias() can be called after ovl_create_real()
@@ -259,6 +288,8 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
inode = ovl_get_inode(dentry->d_sb, &oip);
if (IS_ERR(inode))
return PTR_ERR(inode);
+ if (inode == oip.newinode)
+ ovl_set_flag(OVL_UPPERDATA, inode);
} else {
WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
dput(newdentry);
@@ -291,6 +322,7 @@ static bool ovl_type_origin(struct dentry *dentry)
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct ovl_cattr *attr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *udir = upperdir->d_inode;
struct dentry *newdentry;
@@ -300,16 +332,16 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
attr->mode &= ~current_umask();
inode_lock_nested(udir, I_MUTEX_PARENT);
- newdentry = ovl_create_real(udir,
- lookup_one_len(dentry->d_name.name,
- upperdir,
- dentry->d_name.len),
+ newdentry = ovl_create_real(ofs, udir,
+ ovl_lookup_upper(ofs, dentry->d_name.name,
+ upperdir, dentry->d_name.len),
attr);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
- if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+ if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
+ !ovl_allow_offline_changes(ofs)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
@@ -322,7 +354,7 @@ out_unlock:
return err;
out_cleanup:
- ovl_cleanup(udir, newdentry);
+ ovl_cleanup(ofs, udir, newdentry);
dput(newdentry);
goto out_unlock;
}
@@ -330,6 +362,7 @@ out_cleanup:
static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
@@ -360,12 +393,12 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (upper->d_parent->d_inode != udir)
goto out_unlock;
- opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode));
+ opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode));
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out_unlock;
- err = ovl_copy_xattr(upper, opaquedir);
+ err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir);
if (err)
goto out_cleanup;
@@ -374,17 +407,17 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
goto out_cleanup;
inode_lock(opaquedir->d_inode);
- err = ovl_set_attr(opaquedir, &stat);
+ err = ovl_set_attr(ofs, opaquedir, &stat);
inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+ err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
if (err)
goto out_cleanup;
- ovl_cleanup_whiteouts(upper, list);
- ovl_cleanup(wdir, upper);
+ ovl_cleanup_whiteouts(ofs, upper, list);
+ ovl_cleanup(ofs, wdir, upper);
unlock_rename(workdir, upperdir);
/* dentry's upper doesn't match now, get rid of it */
@@ -393,7 +426,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
return opaquedir;
out_cleanup:
- ovl_cleanup(wdir, opaquedir);
+ ovl_cleanup(ofs, wdir, opaquedir);
dput(opaquedir);
out_unlock:
unlock_rename(workdir, upperdir);
@@ -401,8 +434,8 @@ out:
return ERR_PTR(err);
}
-static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
- const struct posix_acl *acl)
+static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
+ const char *name, const struct posix_acl *acl)
{
void *buffer;
size_t size;
@@ -420,7 +453,7 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
if (err < 0)
goto out_free;
- err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+ err = ovl_do_setxattr(ofs, upperdentry, name, buffer, size, XATTR_CREATE);
out_free:
kfree(buffer);
return err;
@@ -429,6 +462,7 @@ out_free:
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct ovl_cattr *cattr)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
@@ -453,8 +487,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out;
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_unlock;
@@ -463,7 +497,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
goto out_dput;
- newdentry = ovl_create_temp(workdir, cattr);
+ newdentry = ovl_create_temp(ofs, workdir, cattr);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_dput;
@@ -479,19 +513,19 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
.ia_mode = cattr->mode,
};
inode_lock(newdentry->d_inode);
- err = notify_change(newdentry, &attr, NULL);
+ err = ovl_do_notify_change(ofs, newdentry, &attr);
inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
}
if (!hardlink) {
- err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
- acl);
+ err = ovl_set_upper_acl(ofs, newdentry,
+ XATTR_NAME_POSIX_ACL_ACCESS, acl);
if (err)
goto out_cleanup;
- err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
- default_acl);
+ err = ovl_set_upper_acl(ofs, newdentry,
+ XATTR_NAME_POSIX_ACL_DEFAULT, default_acl);
if (err)
goto out_cleanup;
}
@@ -501,20 +535,22 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, newdentry, udir, upper,
+ err = ovl_do_rename(ofs, wdir, newdentry, udir, upper,
RENAME_EXCHANGE);
if (err)
goto out_cleanup;
- ovl_cleanup(wdir, upper);
+ ovl_cleanup(ofs, wdir, upper);
} else {
- err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0);
if (err)
goto out_cleanup;
}
err = ovl_instantiate(dentry, inode, newdentry, hardlink);
- if (err)
- goto out_cleanup;
+ if (err) {
+ ovl_cleanup(ofs, udir, newdentry);
+ dput(newdentry);
+ }
out_dput:
dput(upper);
out_unlock:
@@ -527,7 +563,7 @@ out:
return err;
out_cleanup:
- ovl_cleanup(wdir, newdentry);
+ ovl_cleanup(ofs, wdir, newdentry);
dput(newdentry);
goto out_dput;
}
@@ -607,7 +643,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
inode->i_state |= I_CREATING;
spin_unlock(&inode->i_lock);
- inode_init_owner(inode, dentry->d_parent->d_inode, mode);
+ inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
err = ovl_create_or_link(dentry, inode, &attr, false);
@@ -621,19 +657,20 @@ out:
return err;
}
-static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
}
-static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
}
-static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
- dev_t rdev)
+static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t rdev)
{
/* Don't allow creation of "whiteout" on overlay */
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
@@ -642,8 +679,8 @@ static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return ovl_create_object(dentry, mode, rdev, NULL);
}
-static int ovl_symlink(struct inode *dir, struct dentry *dentry,
- const char *link)
+static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ struct dentry *dentry, const char *link)
{
return ovl_create_object(dentry, S_IFLNK, 0, link);
}
@@ -712,6 +749,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
static int ovl_remove_and_whiteout(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct dentry *upper;
@@ -732,8 +770,8 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
if (err)
goto out_dput;
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_unlock;
@@ -745,7 +783,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out_dput_upper;
}
- err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+ err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
if (err)
goto out_d_drop;
@@ -765,6 +803,7 @@ out:
static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
struct dentry *upper;
@@ -779,8 +818,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
}
inode_lock_nested(dir, I_MUTEX_PARENT);
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto out_unlock;
@@ -791,9 +830,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
goto out_dput_upper;
if (is_dir)
- err = vfs_rmdir(dir, upper);
+ err = ovl_do_rmdir(ofs, dir, upper);
else
- err = vfs_unlink(dir, upper, NULL);
+ err = ovl_do_unlink(ofs, dir, upper);
ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
/*
@@ -819,11 +858,32 @@ static bool ovl_pure_upper(struct dentry *dentry)
!ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
}
+static void ovl_drop_nlink(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct dentry *alias;
+
+ /* Try to find another, hashed alias */
+ spin_lock(&inode->i_lock);
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+ if (alias != dentry && !d_unhashed(alias))
+ break;
+ }
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * Changes to underlying layers may cause i_nlink to lose sync with
+ * reality. In this case prevent the link count from going to zero
+ * prematurely.
+ */
+ if (inode->i_nlink > !!alias)
+ drop_nlink(inode);
+}
+
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
int err;
const struct cred *old_cred;
- struct dentry *upperdentry;
bool lower_positive = ovl_lower_positive(dentry);
LIST_HEAD(list);
@@ -856,7 +916,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (is_dir)
clear_nlink(dentry->d_inode);
else
- drop_nlink(dentry->d_inode);
+ ovl_drop_nlink(dentry);
}
ovl_nlink_end(dentry);
@@ -866,9 +926,8 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
* Note: we fail to update ctime if there was no copy-up, only a
* whiteout
*/
- upperdentry = ovl_dentry_upper(dentry);
- if (upperdentry)
- ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+ if (ovl_dentry_upper(dentry))
+ ovl_copyattr(d_inode(dentry));
out_drop_write:
ovl_drop_write(dentry);
@@ -940,8 +999,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
- tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
+ tmp = dget_parent(d);
dput(d);
d = tmp;
@@ -990,6 +1049,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
{
int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
const char *redirect = ovl_dentry_get_redirect(dentry);
bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
@@ -1000,7 +1060,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
if (IS_ERR(redirect))
return PTR_ERR(redirect);
- err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+ err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
OVL_XATTR_REDIRECT,
redirect, strlen(redirect), -EXDEV);
if (!err) {
@@ -1017,9 +1077,9 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
return err;
}
-static int ovl_rename(struct inode *olddir, struct dentry *old,
- struct inode *newdir, struct dentry *new,
- unsigned int flags)
+static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+ struct dentry *old, struct inode *newdir,
+ struct dentry *new, unsigned int flags)
{
int err;
struct dentry *old_upperdir;
@@ -1037,6 +1097,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
bool samedir = olddir == newdir;
struct dentry *opaquedir = NULL;
const struct cred *old_cred = NULL;
+ struct ovl_fs *ofs = OVL_FS(old->d_sb);
LIST_HEAD(list);
err = -EINVAL;
@@ -1131,8 +1192,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
trap = lock_rename(new_upperdir, old_upperdir);
- olddentry = lookup_one_len(old->d_name.name, old_upperdir,
- old->d_name.len);
+ olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
+ old->d_name.len);
err = PTR_ERR(olddentry);
if (IS_ERR(olddentry))
goto out_unlock;
@@ -1141,8 +1202,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
- newdentry = lookup_one_len(new->d_name.name, new_upperdir,
- new->d_name.len);
+ newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
+ new->d_name.len);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_dput_old;
@@ -1160,9 +1221,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_dput;
}
} else {
- if (!d_is_negative(newdentry) &&
- (!new_opaque || !ovl_is_whiteout(newdentry)))
- goto out_dput;
+ if (!d_is_negative(newdentry)) {
+ if (!new_opaque || !ovl_is_whiteout(newdentry))
+ goto out_dput;
+ } else {
+ if (flags & RENAME_EXCHANGE)
+ goto out_dput;
+ }
}
if (olddentry == trap)
@@ -1189,19 +1254,19 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (err)
goto out_dput;
- err = ovl_do_rename(old_upperdir->d_inode, olddentry,
+ err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry,
new_upperdir->d_inode, newdentry, flags);
if (err)
goto out_dput;
if (cleanup_whiteout)
- ovl_cleanup(old_upperdir->d_inode, newdentry);
+ ovl_cleanup(ofs, old_upperdir->d_inode, newdentry);
if (overwrite && d_inode(new)) {
if (new_is_dir)
clear_nlink(d_inode(new));
else
- drop_nlink(d_inode(new));
+ ovl_drop_nlink(new);
}
ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
@@ -1210,9 +1275,9 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
(d_inode(new) && ovl_type_origin(new)));
/* copy ctime: */
- ovl_copyattr(d_inode(olddentry), d_inode(old));
+ ovl_copyattr(d_inode(old));
if (d_inode(new) && ovl_dentry_upper(new))
- ovl_copyattr(d_inode(newdentry), d_inode(new));
+ ovl_copyattr(d_inode(new));
out_dput:
dput(newdentry);
@@ -1248,4 +1313,6 @@ const struct inode_operations ovl_dir_inode_operations = {
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
+ .fileattr_get = ovl_fileattr_get,
+ .fileattr_set = ovl_fileattr_set,
};
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 6f54d70cef27..e065a5b9a442 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -204,14 +204,15 @@ static int ovl_check_encode_origin(struct dentry *dentry)
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
- if (d_is_dir(dentry) && ofs->upper_mnt)
+ if (d_is_dir(dentry) && ovl_upper_mnt(ofs))
return ovl_connect_layer(dentry);
/* Lower file handle for indexed and non-upper dir/non-dir */
return 1;
}
-static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
+static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+ u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
@@ -226,17 +227,14 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
goto fail;
/* Encode an upper or lower file handle */
- fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+ fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
ovl_dentry_upper(dentry), !enc_lower);
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = -EOVERFLOW;
len = OVL_FH_LEN(fh);
- if (len > buflen)
- goto fail;
-
- memcpy(fid, fh, len);
+ if (len <= buflen)
+ memcpy(fid, fh, len);
err = len;
out:
@@ -244,32 +242,34 @@ out:
return err;
fail:
- pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
- dentry, err, buflen, fh ? (int)fh->fb.len : 0,
- fh ? fh->fb.type : 0);
+ pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
+ dentry, err);
goto out;
}
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
struct dentry *dentry;
- int bytes = *max_len << 2;
+ int bytes, buflen = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
return FILEID_INVALID;
dentry = d_find_any_alias(inode);
- if (WARN_ON(!dentry))
+ if (!dentry)
return FILEID_INVALID;
- bytes = ovl_dentry_to_fid(dentry, fid, bytes);
+ bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
dput(dentry);
if (bytes <= 0)
return FILEID_INVALID;
*max_len = bytes >> 2;
+ if (bytes > buflen)
+ return FILEID_INVALID;
return OVL_FILEID_V1;
}
@@ -308,29 +308,35 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
ovl_set_flag(OVL_UPPERDATA, inode);
dentry = d_find_any_alias(inode);
- if (!dentry) {
- dentry = d_alloc_anon(inode->i_sb);
- if (!dentry)
- goto nomem;
- oe = ovl_alloc_entry(lower ? 1 : 0);
- if (!oe)
- goto nomem;
-
- if (lower) {
- oe->lowerstack->dentry = dget(lower);
- oe->lowerstack->layer = lowerpath->layer;
- }
- dentry->d_fsdata = oe;
- if (upper_alias)
- ovl_dentry_set_upper_alias(dentry);
+ if (dentry)
+ goto out_iput;
+
+ dentry = d_alloc_anon(inode->i_sb);
+ if (unlikely(!dentry))
+ goto nomem;
+ oe = ovl_alloc_entry(lower ? 1 : 0);
+ if (!oe)
+ goto nomem;
+
+ if (lower) {
+ oe->lowerstack->dentry = dget(lower);
+ oe->lowerstack->layer = lowerpath->layer;
}
+ dentry->d_fsdata = oe;
+ if (upper_alias)
+ ovl_dentry_set_upper_alias(dentry);
+
+ ovl_dentry_update_reval(dentry, upper,
+ DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
return d_instantiate_anon(dentry, inode);
nomem:
- iput(inode);
dput(dentry);
- return ERR_PTR(-ENOMEM);
+ dentry = ERR_PTR(-ENOMEM);
+out_iput:
+ iput(inode);
+ return dentry;
}
/* Get the upper or lower dentry in stach whose on layer @idx */
@@ -385,7 +391,13 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
* pointer because we hold no lock on the real dentry.
*/
take_dentry_name_snapshot(&name, real);
+ /*
+ * No mnt_userns handling here: it's an internal lookup. Could skip
+ * permission checking altogether, but for now just use non-mnt_userns
+ * transformed ids.
+ */
this = lookup_one_len(name.name.name, connected, name.name.len);
+ release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
@@ -400,7 +412,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
}
out:
- release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
@@ -472,7 +483,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
if (IS_ERR_OR_NULL(this))
return this;
- if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+ if (ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
this = ERR_PTR(-EIO);
}
@@ -673,10 +684,10 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
struct dentry *dentry;
struct dentry *upper;
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
return ERR_PTR(-EACCES);
- upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+ upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
if (IS_ERR_OR_NULL(upper))
return upper;
@@ -748,7 +759,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
goto out_err;
}
if (index) {
- err = ovl_verify_origin(index, origin.dentry, false);
+ err = ovl_verify_origin(ofs, index, origin.dentry, false);
if (err)
goto out_err;
}
@@ -777,6 +788,9 @@ static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
if (fh_type != OVL_FILEID_V0)
return ERR_PTR(-EINVAL);
+ if (buflen <= OVL_FH_WIRE_OFFSET)
+ return ERR_PTR(-EINVAL);
+
fh = kzalloc(buflen, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 87c362f65448..a1a22f58ba18 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -10,12 +10,14 @@
#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/splice.h>
+#include <linux/security.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include "overlayfs.h"
struct ovl_aio_req {
struct kiocb iocb;
+ refcount_t ref;
struct kiocb *orig_iocb;
struct fd fd;
};
@@ -32,17 +34,36 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
return 'm';
}
+/* No atime modificaton nor notify on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+
static struct file *ovl_open_realfile(const struct file *file,
- struct inode *realinode)
+ const struct path *realpath)
{
+ struct inode *realinode = d_inode(realpath->dentry);
struct inode *inode = file_inode(file);
+ struct user_namespace *real_mnt_userns;
struct file *realfile;
const struct cred *old_cred;
- int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
+ int flags = file->f_flags | OVL_OPEN_FLAGS;
+ int acc_mode = ACC_MODE(flags);
+ int err;
+
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
old_cred = ovl_override_creds(inode->i_sb);
- realfile = open_with_fake_path(&file->f_path, flags, realinode,
- current_cred());
+ real_mnt_userns = mnt_user_ns(realpath->mnt);
+ err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
+ if (err) {
+ realfile = ERR_PTR(err);
+ } else {
+ if (!inode_owner_or_capable(real_mnt_userns, realinode))
+ flags &= ~O_NOATIME;
+
+ realfile = open_with_fake_path(&file->f_path, flags, realinode,
+ current_cred());
+ }
revert_creds(old_cred);
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
@@ -59,23 +80,13 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
struct inode *inode = file_inode(file);
int err;
- /* No atime modificaton on underlying */
- flags |= O_NOATIME | FMODE_NONOTIFY;
-
- /* If some flag changed that cannot be changed then something's amiss */
- if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
- return -EIO;
-
flags &= OVL_SETFL_MASK;
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
return -EPERM;
- if (flags & O_DIRECT) {
- if (!file->f_mapping->a_ops ||
- !file->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
+ if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
+ return -EINVAL;
if (file->f_op->check_flags) {
err = file->f_op->check_flags(flags);
@@ -93,27 +104,27 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
bool allow_meta)
{
- struct inode *inode = file_inode(file);
- struct inode *realinode;
+ struct dentry *dentry = file_dentry(file);
+ struct path realpath;
real->flags = 0;
real->file = file->private_data;
if (allow_meta)
- realinode = ovl_inode_real(inode);
+ ovl_path_real(dentry, &realpath);
else
- realinode = ovl_inode_realdata(inode);
+ ovl_path_realdata(dentry, &realpath);
/* Has it been copied up since we'd opened it? */
- if (unlikely(file_inode(real->file) != realinode)) {
+ if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
real->flags = FDPUT_FPUT;
- real->file = ovl_open_realfile(file, realinode);
+ real->file = ovl_open_realfile(file, &realpath);
return PTR_ERR_OR_ZERO(real->file);
}
/* Did the flags change since open? */
- if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
+ if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
return ovl_change_flags(real->file, file->f_flags);
return 0;
@@ -121,22 +132,32 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
static int ovl_real_fdget(const struct file *file, struct fd *real)
{
+ if (d_is_dir(file_dentry(file))) {
+ real->flags = 0;
+ real->file = ovl_dir_real_file(file, false);
+
+ return PTR_ERR_OR_ZERO(real->file);
+ }
+
return ovl_real_fdget_meta(file, real, false);
}
static int ovl_open(struct inode *inode, struct file *file)
{
+ struct dentry *dentry = file_dentry(file);
struct file *realfile;
+ struct path realpath;
int err;
- err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
+ err = ovl_maybe_copy_up(dentry, file->f_flags);
if (err)
return err;
/* No longer need these flags, so don't pass them on to underlying fs */
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
- realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+ ovl_path_realdata(dentry, &realpath);
+ realfile = ovl_open_realfile(file, &realpath);
if (IS_ERR(realfile))
return PTR_ERR(realfile);
@@ -219,9 +240,8 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path);
}
-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+static rwf_t ovl_iocb_to_rwf(int ifl)
{
- int ifl = iocb->ki_flags;
rwf_t flags = 0;
if (ifl & IOCB_NOWAIT)
@@ -236,6 +256,14 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags;
}
+static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
+{
+ if (refcount_dec_and_test(&aio_req->ref)) {
+ fdput(aio_req->fd);
+ kmem_cache_free(ovl_aio_request_cachep, aio_req);
+ }
+}
+
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
@@ -248,22 +276,21 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
SB_FREEZE_WRITE);
file_end_write(iocb->ki_filp);
- ovl_copyattr(ovl_inode_real(inode), inode);
+ ovl_copyattr(inode);
}
orig_iocb->ki_pos = iocb->ki_pos;
- fdput(aio_req->fd);
- kmem_cache_free(ovl_aio_request_cachep, aio_req);
+ ovl_aio_put(aio_req);
}
-static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ovl_aio_cleanup_handler(aio_req);
- orig_iocb->ki_complete(orig_iocb, res, res2);
+ orig_iocb->ki_complete(orig_iocb, res);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -280,10 +307,15 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret)
return ret;
+ ret = -EINVAL;
+ if (iocb->ki_flags & IOCB_DIRECT &&
+ !(real.file->f_mode & FMODE_CAN_ODIRECT))
+ goto out_fdput;
+
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ ovl_iocb_to_rwf(iocb->ki_flags));
} else {
struct ovl_aio_req *aio_req;
@@ -297,14 +329,16 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+ ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
ovl_file_accessed(file);
-
+out_fdput:
fdput(real);
return ret;
@@ -317,13 +351,14 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct fd real;
const struct cred *old_cred;
ssize_t ret;
+ int ifl = iocb->ki_flags;
if (!iov_iter_count(iter))
return 0;
inode_lock(inode);
/* Update mode */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ ovl_copyattr(inode);
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
@@ -332,14 +367,22 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret)
goto out_unlock;
+ ret = -EINVAL;
+ if (iocb->ki_flags & IOCB_DIRECT &&
+ !(real.file->f_mode & FMODE_CAN_ODIRECT))
+ goto out_fdput;
+
+ if (!ovl_should_sync(OVL_FS(inode->i_sb)))
+ ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
+
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ ovl_iocb_to_rwf(ifl));
file_end_write(real.file);
/* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ ovl_copyattr(inode);
} else {
struct ovl_aio_req *aio_req;
@@ -356,13 +399,17 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+ ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
+out_fdput:
fdput(real);
out_unlock:
@@ -371,45 +418,47 @@ out_unlock:
return ret;
}
-static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(real.file) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
- ssize_t ret;
struct fd real;
const struct cred *old_cred;
+ struct inode *inode = file_inode(out);
+ ssize_t ret;
- ret = ovl_real_fdget(in, &real);
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(inode);
+ ret = file_remove_privs(out);
if (ret)
- return ret;
-
- old_cred = ovl_override_creds(file_inode(in)->i_sb);
- ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
- revert_creds(old_cred);
-
- ovl_file_accessed(in);
- fdput(real);
- return ret;
-}
-
-static ssize_t
-ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
-{
- struct fd real;
- const struct cred *old_cred;
- ssize_t ret;
+ goto out_unlock;
ret = ovl_real_fdget(out, &real);
if (ret)
- return ret;
+ goto out_unlock;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ file_start_write(real.file);
- old_cred = ovl_override_creds(file_inode(out)->i_sb);
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
- revert_creds(old_cred);
- ovl_file_accessed(out);
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(inode);
+ revert_creds(old_cred);
fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
return ret;
}
@@ -419,6 +468,10 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct cred *old_cred;
int ret;
+ ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
+ if (ret <= 0)
+ return ret;
+
ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret)
return ret;
@@ -447,20 +500,11 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
if (WARN_ON(file != vma->vm_file))
return -EIO;
- vma->vm_file = get_file(realfile);
+ vma_set_file(vma, realfile);
old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = call_mmap(vma->vm_file, vma);
revert_creds(old_cred);
-
- if (ret) {
- /* Drop reference count from new vm_file value */
- fput(realfile);
- } else {
- /* Drop reference count from previous vm_file value */
- fput(file);
- }
-
ovl_file_accessed(file);
return ret;
@@ -482,7 +526,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
revert_creds(old_cred);
/* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ ovl_copyattr(inode);
fdput(real);
@@ -508,166 +552,6 @@ static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
return ret;
}
-static long ovl_real_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct fd real;
- const struct cred *old_cred;
- long ret;
-
- ret = ovl_real_fdget(file, &real);
- if (ret)
- return ret;
-
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_ioctl(real.file, cmd, arg);
- revert_creds(old_cred);
-
- fdput(real);
-
- return ret;
-}
-
-static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int iflags)
-{
- long ret;
- struct inode *inode = file_inode(file);
- unsigned int old_iflags;
-
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- inode_lock(inode);
-
- /* Check the capability before cred override */
- ret = -EPERM;
- old_iflags = READ_ONCE(inode->i_flags);
- if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
- !capable(CAP_LINUX_IMMUTABLE))
- goto unlock;
-
- ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
- if (ret)
- goto unlock;
-
- ret = ovl_real_ioctl(file, cmd, arg);
-
- ovl_copyflags(ovl_inode_real(inode), inode);
-unlock:
- inode_unlock(inode);
-
- mnt_drop_write_file(file);
-
- return ret;
-
-}
-
-static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
-{
- unsigned int iflags = 0;
-
- if (flags & FS_SYNC_FL)
- iflags |= S_SYNC;
- if (flags & FS_APPEND_FL)
- iflags |= S_APPEND;
- if (flags & FS_IMMUTABLE_FL)
- iflags |= S_IMMUTABLE;
- if (flags & FS_NOATIME_FL)
- iflags |= S_NOATIME;
-
- return iflags;
-}
-
-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- unsigned int flags;
-
- if (get_user(flags, (int __user *) arg))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsflags_to_iflags(flags));
-}
-
-static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
-{
- unsigned int iflags = 0;
-
- if (xflags & FS_XFLAG_SYNC)
- iflags |= S_SYNC;
- if (xflags & FS_XFLAG_APPEND)
- iflags |= S_APPEND;
- if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= S_IMMUTABLE;
- if (xflags & FS_XFLAG_NOATIME)
- iflags |= S_NOATIME;
-
- return iflags;
-}
-
-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(fa));
- if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsxflags_to_iflags(fa.fsx_xflags));
-}
-
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long ret;
-
- switch (cmd) {
- case FS_IOC_GETFLAGS:
- case FS_IOC_FSGETXATTR:
- ret = ovl_real_ioctl(file, cmd, arg);
- break;
-
- case FS_IOC_SETFLAGS:
- ret = ovl_ioctl_set_fsflags(file, cmd, arg);
- break;
-
- case FS_IOC_FSSETXATTR:
- ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
- break;
-
- default:
- ret = -ENOTTY;
- }
-
- return ret;
-}
-
-static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- switch (cmd) {
- case FS_IOC32_GETFLAGS:
- cmd = FS_IOC_GETFLAGS;
- break;
-
- case FS_IOC32_SETFLAGS:
- cmd = FS_IOC_SETFLAGS;
- break;
-
- default:
- return -ENOIOCTLCMD;
- }
-
- return ovl_ioctl(file, cmd, arg);
-}
-
enum ovl_copyop {
OVL_COPY,
OVL_CLONE,
@@ -714,7 +598,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
revert_creds(old_cred);
/* Update size */
- ovl_copyattr(ovl_inode_real(inode_out), inode_out);
+ ovl_copyattr(inode_out);
fdput(real_in);
fdput(real_out);
@@ -757,6 +641,26 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
remap_flags, op);
}
+static int ovl_flush(struct file *file, fl_owner_t id)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ int err;
+
+ err = ovl_real_fdget(file, &real);
+ if (err)
+ return err;
+
+ if (real.file->f_op->flush) {
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ err = real.file->f_op->flush(real.file, id);
+ revert_creds(old_cred);
+ }
+ fdput(real);
+
+ return err;
+}
+
const struct file_operations ovl_file_operations = {
.open = ovl_open,
.release = ovl_release,
@@ -767,9 +671,8 @@ const struct file_operations ovl_file_operations = {
.mmap = ovl_mmap,
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
- .unlocked_ioctl = ovl_ioctl,
- .compat_ioctl = ovl_compat_ioctl,
- .splice_read = ovl_splice_read,
+ .flush = ovl_flush,
+ .splice_read = generic_file_splice_read,
.splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 79e8994e3bc1..9e61511de7a7 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -10,17 +10,23 @@
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/ratelimit.h>
+#include <linux/fiemap.h>
+#include <linux/fileattr.h>
+#include <linux/security.h>
+#include <linux/namei.h>
#include "overlayfs.h"
-int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
bool full_copy_up = false;
struct dentry *upperdentry;
const struct cred *old_cred;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(&init_user_ns, dentry, attr);
if (err)
return err;
@@ -29,12 +35,6 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
goto out;
if (attr->ia_valid & ATTR_SIZE) {
- struct inode *realinode = d_inode(ovl_dentry_real(dentry));
-
- err = -ETXTBSY;
- if (atomic_read(&realinode->i_writecount) < 0)
- goto out_drop_write;
-
/* Truncate should trigger data copy up as well */
full_copy_up = true;
}
@@ -58,12 +58,30 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
+ /*
+ * We might have to translate ovl file into real file object
+ * once use cases emerge. For now, simply don't let underlying
+ * filesystem rely on attr->ia_file
+ */
+ attr->ia_valid &= ~ATTR_FILE;
+
+ /*
+ * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN
+ * set. Overlayfs does not pass O_TRUNC flag to underlying
+ * filesystem during open -> do not pass ATTR_OPEN. This
+ * disables optimization in fuse which assumes open(O_TRUNC)
+ * already set file size to 0. But we never passed O_TRUNC to
+ * fuse. So by clearing ATTR_OPEN, fuse will be forced to send
+ * setattr request to server.
+ */
+ attr->ia_valid &= ~ATTR_OPEN;
+
inode_lock(upperdentry->d_inode);
old_cred = ovl_override_creds(dentry->d_sb);
- err = notify_change(upperdentry, attr, NULL);
+ err = ovl_do_notify_change(ofs, upperdentry, attr);
revert_creds(old_cred);
if (!err)
- ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
+ ovl_copyattr(dentry->d_inode);
inode_unlock(upperdentry->d_inode);
if (winode)
@@ -75,10 +93,11 @@ out:
return err;
}
-static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
+static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{
bool samefs = ovl_same_fs(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+ unsigned int xinoshift = 64 - xinobits;
if (samefs) {
/*
@@ -87,24 +106,24 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
* which is friendly to du -x.
*/
stat->dev = dentry->d_sb->s_dev;
- return 0;
+ return;
} else if (xinobits) {
- unsigned int shift = 64 - xinobits;
/*
* All inode numbers of underlying fs should not be using the
* high xinobits, so we use high xinobits to partition the
* overlay st_ino address space. The high bits holds the fsid
- * (upper fsid is 0). This way overlay inode numbers are unique
- * and all inodes use overlay st_dev. Inode numbers are also
- * persistent for a given layer configuration.
+ * (upper fsid is 0). The lowest xinobit is reserved for mapping
+ * the non-persistent inode numbers range in case of overflow.
+ * This way all overlay inode numbers are unique and use the
+ * overlay st_dev.
*/
- if (stat->ino >> shift) {
+ if (likely(!(stat->ino >> xinoshift))) {
+ stat->ino |= ((u64)fsid) << (xinoshift + 1);
+ stat->dev = dentry->d_sb->s_dev;
+ return;
+ } else if (ovl_xino_warn(dentry->d_sb)) {
pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits);
- } else {
- stat->ino |= ((u64)fsid) << shift;
- stat->dev = dentry->d_sb->s_dev;
- return 0;
}
}
@@ -130,18 +149,17 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
*/
stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
}
-
- return 0;
}
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;
- bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+ struct inode *inode = d_inode(dentry);
+ bool is_dir = S_ISDIR(inode->i_mode);
int fsid = 0;
int err;
bool metacopy_blocks = false;
@@ -154,6 +172,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (err)
goto out;
+ /* Report the effective immutable/append-only STATX flags */
+ generic_fill_statx_attr(inode, stat);
+
/*
* For non-dir or same fs, we use st_ino of the copy up origin.
* This guaranties constant st_dev/st_ino across copy up.
@@ -230,9 +251,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
}
}
- err = ovl_map_dev_ino(dentry, stat, fsid);
- if (err)
- goto out;
+ ovl_map_dev_ino(dentry, stat, fsid);
/*
* It's probably not worth it to count subdirs to get the
@@ -257,15 +276,18 @@ out:
return err;
}
-int ovl_permission(struct inode *inode, int mask)
+int ovl_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
{
struct inode *upperinode = ovl_inode_upper(inode);
- struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
+ struct inode *realinode;
+ struct path realpath;
const struct cred *old_cred;
int err;
/* Careful in RCU walk mode */
- if (!realinode) {
+ ovl_i_path_real(inode, &realpath);
+ if (!realpath.dentry) {
WARN_ON(!(mask & MAY_NOT_BLOCK));
return -ECHILD;
}
@@ -274,10 +296,11 @@ int ovl_permission(struct inode *inode, int mask)
* Check overlay inode with the creds of task and underlying inode
* with creds of mounter
*/
- err = generic_permission(inode, mask);
+ err = generic_permission(&init_user_ns, inode, mask);
if (err)
return err;
+ realinode = d_inode(realpath.dentry);
old_cred = ovl_override_creds(inode->i_sb);
if (!upperinode &&
!special_file(realinode->i_mode) && mask & MAY_WRITE) {
@@ -285,7 +308,7 @@ int ovl_permission(struct inode *inode, int mask)
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
- err = inode_permission(realinode, mask);
+ err = inode_permission(mnt_user_ns(realpath.mnt), realinode, mask);
revert_creds(old_cred);
return err;
@@ -307,18 +330,26 @@ static const char *ovl_get_link(struct dentry *dentry,
return p;
}
-bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
{
- return strncmp(name, OVL_XATTR_PREFIX,
- sizeof(OVL_XATTR_PREFIX) - 1) == 0;
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ if (ofs->config.userxattr)
+ return strncmp(name, OVL_XATTR_USER_PREFIX,
+ sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
+ else
+ return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+ sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
}
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
int err;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+ struct path realpath;
const struct cred *old_cred;
err = ovl_want_write(dentry);
@@ -326,7 +357,10 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
goto out;
if (!value && !upperdentry) {
- err = vfs_getxattr(realdentry, name, NULL, 0);
+ ovl_path_lower(dentry, &realpath);
+ old_cred = ovl_override_creds(dentry->d_sb);
+ err = vfs_getxattr(mnt_user_ns(realpath.mnt), realdentry, name, NULL, 0);
+ revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
}
@@ -340,16 +374,17 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
}
old_cred = ovl_override_creds(dentry->d_sb);
- if (value)
- err = vfs_setxattr(realdentry, name, value, size, flags);
- else {
+ if (value) {
+ err = ovl_do_setxattr(ofs, realdentry, name, value, size,
+ flags);
+ } else {
WARN_ON(flags != XATTR_REPLACE);
- err = vfs_removexattr(realdentry, name);
+ err = ovl_do_removexattr(ofs, realdentry, name);
}
revert_creds(old_cred);
/* copy c/mtime */
- ovl_copyattr(d_inode(realdentry), inode);
+ ovl_copyattr(inode);
out_drop_write:
ovl_drop_write(dentry);
@@ -362,24 +397,27 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
{
ssize_t res;
const struct cred *old_cred;
- struct dentry *realdentry =
- ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
+ struct path realpath;
+ ovl_i_path_real(inode, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_getxattr(realdentry, name, value, size);
+ res = vfs_getxattr(mnt_user_ns(realpath.mnt), realpath.dentry, name, value, size);
revert_creds(old_cred);
return res;
}
-static bool ovl_can_list(const char *s)
+static bool ovl_can_list(struct super_block *sb, const char *s)
{
- /* List all non-trusted xatts */
+ /* Never list private (.overlay) */
+ if (ovl_is_private_xattr(sb, s))
+ return false;
+
+ /* List all non-trusted xattrs */
if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
return true;
- /* Never list trusted.overlay, list other trusted for superuser only */
- return !ovl_is_private_xattr(s) &&
- ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ /* list other trusted for superuser only */
+ return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
@@ -405,7 +443,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return -EIO;
len -= slen;
- if (!ovl_can_list(s)) {
+ if (!ovl_can_list(dentry->d_sb, s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
@@ -416,28 +454,107 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return res;
}
-struct posix_acl *ovl_get_acl(struct inode *inode, int type)
+#ifdef CONFIG_FS_POSIX_ACL
+/*
+ * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
+ * of the POSIX ACLs retrieved from the lower layer to this function to not
+ * alter the POSIX ACLs for the underlying filesystem.
+ */
+static void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
+ struct posix_acl *acl)
+{
+ struct user_namespace *fs_userns = i_user_ns(realinode);
+
+ for (unsigned int i = 0; i < acl->a_count; i++) {
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ struct posix_acl_entry *e = &acl->a_entries[i];
+ switch (e->e_tag) {
+ case ACL_USER:
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
+ e->e_uid = vfsuid_into_kuid(vfsuid);
+ break;
+ case ACL_GROUP:
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
+ e->e_gid = vfsgid_into_kgid(vfsgid);
+ break;
+ }
+ }
+}
+
+/*
+ * When the relevant layer is an idmapped mount we need to take the idmapping
+ * of the layer into account and translate any ACL_{GROUP,USER} values
+ * according to the idmapped mount.
+ *
+ * We cannot alter the ACLs returned from the relevant layer as that would
+ * alter the cached values filesystem wide for the lower filesystem. Instead we
+ * can clone the ACLs and then apply the relevant idmapping of the layer.
+ *
+ * This is obviously only relevant when idmapped layers are used.
+ */
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
{
struct inode *realinode = ovl_inode_real(inode);
- const struct cred *old_cred;
- struct posix_acl *acl;
+ struct posix_acl *acl, *clone;
+ struct path realpath;
- if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
+ if (!IS_POSIXACL(realinode))
return NULL;
- old_cred = ovl_override_creds(inode->i_sb);
- acl = get_acl(realinode, type);
- revert_creds(old_cred);
+ /* Careful in RCU walk mode */
+ ovl_i_path_real(inode, &realpath);
+ if (!realpath.dentry) {
+ WARN_ON(!rcu);
+ return ERR_PTR(-ECHILD);
+ }
+
+ if (rcu) {
+ acl = get_cached_acl_rcu(realinode, type);
+ } else {
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ acl = get_acl(realinode, type);
+ revert_creds(old_cred);
+ }
+ /*
+ * If there are no POSIX ACLs, or we encountered an error,
+ * or the layer isn't idmapped we don't need to do anything.
+ */
+ if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl))
+ return acl;
+
+ /*
+ * We only get here if the layer is idmapped. So drop out of RCU path
+ * walk so we can clone the ACLs. There's no need to release the ACLs
+ * since get_cached_acl_rcu() doesn't take a reference on the ACLs.
+ */
+ if (rcu)
+ return ERR_PTR(-ECHILD);
- return acl;
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ if (!clone)
+ clone = ERR_PTR(-ENOMEM);
+ else
+ ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone);
+ /*
+ * Since we're not in RCU path walk we always need to release the
+ * original ACLs.
+ */
+ posix_acl_release(acl);
+ return clone;
}
+#endif
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
{
if (flags & S_ATIME) {
struct ovl_fs *ofs = inode->i_sb->s_fs_info;
struct path upperpath = {
- .mnt = ofs->upper_mnt,
+ .mnt = ovl_upper_mnt(ofs),
.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
};
@@ -453,18 +570,147 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
int err;
- struct inode *realinode = ovl_inode_real(inode);
+ struct inode *realinode = ovl_inode_realdata(inode);
const struct cred *old_cred;
if (!realinode->i_op->fiemap)
return -EOPNOTSUPP;
old_cred = ovl_override_creds(inode->i_sb);
+ err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+ revert_creds(old_cred);
- if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
- filemap_write_and_wait(realinode->i_mapping);
+ return err;
+}
- err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+/*
+ * Work around the fact that security_file_ioctl() takes a file argument.
+ * Introducing security_inode_fileattr_get/set() hooks would solve this issue
+ * properly.
+ */
+static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa,
+ bool set)
+{
+ struct file *file;
+ unsigned int cmd;
+ int err;
+
+ file = dentry_open(realpath, O_RDONLY, current_cred());
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ if (set)
+ cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS;
+ else
+ cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS;
+
+ err = security_file_ioctl(file, cmd, 0);
+ fput(file);
+
+ return err;
+}
+
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa)
+{
+ int err;
+
+ err = ovl_security_fileattr(realpath, fa, true);
+ if (err)
+ return err;
+
+ return vfs_fileattr_set(mnt_user_ns(realpath->mnt), realpath->dentry, fa);
+}
+
+int ovl_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct path upperpath;
+ const struct cred *old_cred;
+ unsigned int flags;
+ int err;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up(dentry);
+ if (!err) {
+ ovl_path_real(dentry, &upperpath);
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ /*
+ * Store immutable/append-only flags in xattr and clear them
+ * in upper fileattr (in case they were set by older kernel)
+ * so children of "ovl-immutable" directories lower aliases of
+ * "ovl-immutable" hardlinks could be copied up.
+ * Clear xattr when flags are cleared.
+ */
+ err = ovl_set_protattr(inode, upperpath.dentry, fa);
+ if (!err)
+ err = ovl_real_fileattr_set(&upperpath, fa);
+ revert_creds(old_cred);
+
+ /*
+ * Merge real inode flags with inode flags read from
+ * overlay.protattr xattr
+ */
+ flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK;
+
+ BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK);
+ flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+ inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK);
+
+ /* Update ctime */
+ ovl_copyattr(inode);
+ }
+ ovl_drop_write(dentry);
+out:
+ return err;
+}
+
+/* Convert inode protection flags to fileattr flags */
+static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa)
+{
+ BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL);
+ BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+
+ if (inode->i_flags & S_APPEND) {
+ fa->flags |= FS_APPEND_FL;
+ fa->fsx_xflags |= FS_XFLAG_APPEND;
+ }
+ if (inode->i_flags & S_IMMUTABLE) {
+ fa->flags |= FS_IMMUTABLE_FL;
+ fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+ }
+}
+
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa)
+{
+ int err;
+
+ err = ovl_security_fileattr(realpath, fa, false);
+ if (err)
+ return err;
+
+ err = vfs_fileattr_get(realpath->dentry, fa);
+ if (err == -ENOIOCTLCMD)
+ err = -ENOTTY;
+ return err;
+}
+
+int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct path realpath;
+ const struct cred *old_cred;
+ int err;
+
+ ovl_path_real(dentry, &realpath);
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ err = ovl_real_fileattr_get(&realpath, fa);
+ ovl_fileattr_prot_flags(inode, fa);
revert_creds(old_cred);
return err;
@@ -478,6 +724,8 @@ static const struct inode_operations ovl_file_inode_operations = {
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
.fiemap = ovl_fiemap,
+ .fileattr_get = ovl_fileattr_get,
+ .fileattr_set = ovl_fileattr_set,
};
static const struct inode_operations ovl_symlink_inode_operations = {
@@ -504,11 +752,11 @@ static const struct address_space_operations ovl_aops = {
/*
* It is possible to stack overlayfs instance on top of another
- * overlayfs instance as lower layer. We need to annonate the
+ * overlayfs instance as lower layer. We need to annotate the
* stackable i_mutex locks according to stack level of the super
* block instance. An overlayfs instance can never be in stack
* depth 0 (there is always a real fs below it). An overlayfs
- * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ * inode lock will use the lockdep annotation ovl_i_mutex_key[depth].
*
* For example, here is a snip from /proc/lockdep_chains after
* dir_iterate of nested overlayfs:
@@ -561,27 +809,76 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
#endif
}
-static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
- unsigned long ino, int fsid)
+static void ovl_next_ino(struct inode *inode)
+{
+ struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+
+ inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+ if (unlikely(!inode->i_ino))
+ inode->i_ino = atomic_long_inc_return(&ofs->last_ino);
+}
+
+static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
{
int xinobits = ovl_xino_bits(inode->i_sb);
+ unsigned int xinoshift = 64 - xinobits;
/*
* When d_ino is consistent with st_ino (samefs or i_ino has enough
* bits to encode layer), set the same value used for st_ino to i_ino,
* so inode number exposed via /proc/locks and a like will be
* consistent with d_ino and st_ino values. An i_ino value inconsistent
- * with d_ino also causes nfsd readdirplus to fail. When called from
- * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
- * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+ * with d_ino also causes nfsd readdirplus to fail.
*/
- if (ovl_same_dev(inode->i_sb)) {
- inode->i_ino = ino;
- if (xinobits && fsid && !(ino >> (64 - xinobits)))
- inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
- } else {
- inode->i_ino = get_next_ino();
+ inode->i_ino = ino;
+ if (ovl_same_fs(inode->i_sb)) {
+ return;
+ } else if (xinobits && likely(!(ino >> xinoshift))) {
+ inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
+ return;
+ }
+
+ /*
+ * For directory inodes on non-samefs with xino disabled or xino
+ * overflow, we allocate a non-persistent inode number, to be used for
+ * resolving st_ino collisions in ovl_map_dev_ino().
+ *
+ * To avoid ino collision with legitimate xino values from upper
+ * layer (fsid 0), use the lowest xinobit to map the non
+ * persistent inode numbers to the unified st_ino address space.
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ ovl_next_ino(inode);
+ if (xinobits) {
+ inode->i_ino &= ~0UL >> xinobits;
+ inode->i_ino |= 1UL << xinoshift;
+ }
+ }
+}
+
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+ unsigned long ino, int fsid)
+{
+ struct inode *realinode;
+ struct ovl_inode *oi = OVL_I(inode);
+
+ if (oip->upperdentry)
+ oi->__upperdentry = oip->upperdentry;
+ if (oip->lowerpath && oip->lowerpath->dentry) {
+ oi->lowerpath.dentry = dget(oip->lowerpath->dentry);
+ oi->lowerpath.layer = oip->lowerpath->layer;
}
+ if (oip->lowerdata)
+ oi->lowerdata = igrab(d_inode(oip->lowerdata));
+
+ realinode = ovl_inode_real(inode);
+ ovl_copyattr(inode);
+ ovl_copyflags(realinode, inode);
+ ovl_map_ino(inode, ino, fsid);
+}
+
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+{
inode->i_mode = mode;
inode->i_flags |= S_NOCMTIME;
#ifdef CONFIG_FS_POSIX_ACL
@@ -625,7 +922,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
* For the first, copy up case, the union nlink does not change, whether the
* operation succeeds or fails, but the upper inode nlink may change.
* Therefore, before copy up, we store the union nlink value relative to the
- * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ * lower inode nlink in the index inode xattr .overlay.nlink.
*
* For the second, upper hardlink case, the union nlink should be incremented
* or decremented IFF the operation succeeds, aligned with nlink change of the
@@ -660,8 +957,8 @@ static int ovl_set_nlink_common(struct dentry *dentry,
if (WARN_ON(len >= sizeof(buf)))
return -EIO;
- return ovl_do_setxattr(ovl_dentry_upper(dentry),
- OVL_XATTR_NLINK, buf, len, 0);
+ return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len);
}
int ovl_set_nlink_upper(struct dentry *dentry)
@@ -674,7 +971,7 @@ int ovl_set_nlink_lower(struct dentry *dentry)
return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
}
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback)
{
@@ -686,7 +983,8 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
return fallback;
- err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+ err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK,
+ &buf, sizeof(buf) - 1);
if (err < 0)
goto fail;
@@ -719,7 +1017,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
inode = new_inode(sb);
if (inode)
- ovl_fill_inode(inode, mode, rdev, 0, 0);
+ ovl_fill_inode(inode, mode, rdev);
return inode;
}
@@ -843,7 +1141,7 @@ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
* Does overlay inode need to be hashed by lower inode?
*/
static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
- struct dentry *lower, struct dentry *index)
+ struct dentry *lower, bool index)
{
struct ovl_fs *ofs = sb->s_fs_info;
@@ -856,7 +1154,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
return true;
/* Yes, if won't be copied up */
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
return true;
/* No, if lower hardlink is or will be broken on copy up */
@@ -884,15 +1182,20 @@ static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip)
{
+ struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *upperdentry = oip->upperdentry;
struct ovl_path *lowerpath = oip->lowerpath;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
+ struct path realpath = {
+ .dentry = upperdentry ?: lowerdentry,
+ .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt,
+ };
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
- int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
- bool is_dir, metacopy = false;
+ int fsid = bylower ? lowerpath->layer->fsid : 0;
+ bool is_dir;
unsigned long ino = 0;
int err = oip->newinode ? -EEXIST : -ENOMEM;
@@ -931,7 +1234,8 @@ struct inode *ovl_get_inode(struct super_block *sb,
/* Recalculate nlink for non-dir due to indexing */
if (!is_dir)
- nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
+ nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry,
+ nlink);
set_nlink(inode, nlink);
ino = key->i_ino;
} else {
@@ -941,25 +1245,18 @@ struct inode *ovl_get_inode(struct super_block *sb,
err = -ENOMEM;
goto out_err;
}
+ ino = realinode->i_ino;
+ fsid = lowerpath->layer->fsid;
}
- ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
- ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
+ ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+ ovl_inode_init(inode, oip, ino, fsid);
- if (upperdentry && ovl_is_impuredir(upperdentry))
+ if (upperdentry && ovl_is_impuredir(sb, upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
if (oip->index)
ovl_set_flag(OVL_INDEX, inode);
- if (upperdentry) {
- err = ovl_check_metacopy_xattr(upperdentry);
- if (err < 0)
- goto out_err;
- metacopy = err;
- if (!metacopy)
- ovl_set_flag(OVL_UPPERDATA, inode);
- }
-
OVL_I(inode)->redirect = oip->redirect;
if (bylower)
@@ -968,11 +1265,15 @@ struct inode *ovl_get_inode(struct super_block *sb,
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
- ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
+ ovl_path_check_origin_xattr(ofs, &realpath)) {
ovl_set_flag(OVL_WHITEOUTS, inode);
}
}
+ /* Check for immutable/append-only inode flags in xattr */
+ if (upperdentry)
+ ovl_check_protattr(inode, upperdentry);
+
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index ed9e129fae04..0fd1d5fdfc72 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -16,6 +16,7 @@
struct ovl_lookup_data {
struct super_block *sb;
+ struct vfsmount *mnt;
struct qstr name;
bool is_dir;
bool opaque;
@@ -25,13 +26,14 @@ struct ovl_lookup_data {
bool metacopy;
};
-static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
+static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
char *buf;
+ struct ovl_fs *ofs = OVL_FS(d->sb);
- buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+ buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
if (IS_ERR_OR_NULL(buf))
return PTR_ERR(buf);
@@ -40,7 +42,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
* One of the ancestor path elements in an absolute path
* lookup in ovl_lookup_layer() could have been opaque and
* that will stop further lookup in lower layers (d->stop=true)
- * But we have found an absolute redirect in decendant path
+ * But we have found an absolute redirect in descendant path
* element and that should force continue lookup in lower
* layers (reset d->stop).
*/
@@ -104,12 +106,13 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
return 0;
}
-static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
+static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox)
{
int res, err;
struct ovl_fh *fh = NULL;
- res = vfs_getxattr(dentry, name, NULL, 0);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
@@ -123,7 +126,7 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh->buf, res);
+ res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
if (res < 0)
goto fail;
@@ -148,17 +151,22 @@ invalid:
goto out;
}
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
- bool connected)
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct vfsmount *mnt, bool connected)
{
struct dentry *real;
int bytes;
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return NULL;
+
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
+ * In case of uuid=off option just make sure that stored uuid is null.
*/
- if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
+ if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
+ !uuid_is_null(&fh->fb.uuid))
return NULL;
bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
@@ -186,21 +194,43 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
return real;
}
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
+ return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
+}
+
+static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
+ const char *name,
+ struct dentry *base, int len,
+ bool drop_negative)
+{
+ struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len);
+
+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+ if (drop_negative && ret->d_lockref.count == 1) {
+ spin_lock(&ret->d_lock);
+ /* Recheck condition under lock */
+ if (d_is_negative(ret) && ret->d_lockref.count == 1)
+ __d_drop(ret);
+ spin_unlock(&ret->d_lock);
+ }
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
const char *name, unsigned int namelen,
size_t prelen, const char *post,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
struct dentry *this;
+ struct path path;
int err;
bool last_element = !post[0];
- this = lookup_positive_unlocked(name, base, namelen);
+ this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -226,12 +256,15 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = true;
goto put_and_out;
}
+
+ path.dentry = this;
+ path.mnt = d->mnt;
if (!d_can_lookup(this)) {
if (d->is_dir || !last_element) {
d->stop = true;
goto put_and_out;
}
- err = ovl_check_metacopy_xattr(this);
+ err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path);
if (err < 0)
goto out_err;
@@ -251,14 +284,14 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
if (d->last)
goto out;
- if (ovl_is_opaquedir(this)) {
+ if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
d->stop = true;
if (last_element)
d->opaque = true;
goto out;
}
}
- err = ovl_check_redirect(this, d, prelen, post);
+ err = ovl_check_redirect(&path, d, prelen, post);
if (err)
goto out_err;
out:
@@ -276,7 +309,7 @@ out_err:
}
static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
- struct dentry **ret)
+ struct dentry **ret, bool drop_negative)
{
/* Counting down from the end, since the prefix can change */
size_t rem = d->name.len - 1;
@@ -285,7 +318,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
if (d->name.name[0] != '/')
return ovl_lookup_single(base, d, d->name.name, d->name.len,
- 0, "", ret);
+ 0, "", ret, drop_negative);
while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
const char *s = d->name.name + d->name.len - rem;
@@ -298,7 +331,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return -EIO;
err = ovl_lookup_single(base, d, s, thislen,
- d->name.len - rem, next, &base);
+ d->name.len - rem, next, &base,
+ drop_negative);
dput(dentry);
if (err)
return err;
@@ -331,7 +365,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
ofs->layers[i].fs->bad_uuid)
continue;
- origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
+ origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
connected);
if (origin)
break;
@@ -343,7 +377,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
return PTR_ERR(origin);
if (upperdentry && !ovl_is_whiteout(upperdentry) &&
- ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+ inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
goto invalid;
if (!*stackp)
@@ -364,13 +398,13 @@ invalid:
upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
dput(origin);
- return -EIO;
+ return -ESTALE;
}
static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
- struct ovl_path **stackp, unsigned int *ctrp)
+ struct ovl_path **stackp)
{
- struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
+ struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
int err;
if (IS_ERR_OR_NULL(fh))
@@ -385,10 +419,6 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
return err;
}
- if (WARN_ON(*ctrp))
- return -EIO;
-
- *ctrp = 1;
return 0;
}
@@ -396,10 +426,10 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
* Verify that @fh matches the file handle stored in xattr @name.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-static int ovl_verify_fh(struct dentry *dentry, const char *name,
- const struct ovl_fh *fh)
+static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const struct ovl_fh *fh)
{
- struct ovl_fh *ofh = ovl_get_fh(dentry, name);
+ struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
int err = 0;
if (!ofh)
@@ -423,23 +453,24 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
*
* Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set)
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real, bool is_upper,
+ bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(real, is_upper);
+ fh = ovl_encode_real_fh(ofs, real, is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh)) {
fh = NULL;
goto fail;
}
- err = ovl_verify_fh(dentry, name, fh);
+ err = ovl_verify_fh(ofs, dentry, ox, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
+ err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
if (err)
goto fail;
@@ -464,11 +495,11 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
if (!d_is_dir(index))
return dget(index);
- fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+ fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
if (IS_ERR_OR_NULL(fh))
return ERR_CAST(fh);
- upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+ upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
kfree(fh);
if (IS_ERR_OR_NULL(upper))
@@ -484,12 +515,6 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
return upper;
}
-/* Is this a leftover from create/whiteout of directory index entry? */
-static bool ovl_is_temp_index(struct dentry *index)
-{
- return index->d_name.name[0] == '#';
-}
-
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
@@ -507,11 +532,6 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
if (!d_inode(index))
return 0;
- /* Cleanup leftover from index create/cleanup attempt */
- err = -ESTALE;
- if (ovl_is_temp_index(index))
- goto fail;
-
err = -EINVAL;
if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
@@ -568,7 +588,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
}
- err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+ err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
dput(upper);
if (err)
goto fail;
@@ -579,7 +599,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
if (err)
goto fail;
- if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+ if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
goto orphan;
}
@@ -628,15 +648,16 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
* If the index dentry for a copy up origin inode is positive, but points
* to an inode different than the upper inode, then either the upper inode
* has been copied up and not indexed or it was indexed, but since then
- * index dir was cleared. Either way, that index cannot be used to indentify
+ * index dir was cleared. Either way, that index cannot be used to identify
* the overlay inode.
*/
-int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+ struct qstr *name)
{
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(origin, false);
+ fh = ovl_encode_real_fh(ofs, origin, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
@@ -685,11 +706,12 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
bool is_dir = d_is_dir(origin);
int err;
- err = ovl_get_index_name(origin, &name);
+ err = ovl_get_index_name(ofs, origin, &name);
if (err)
return ERR_PTR(err);
- index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name,
+ ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
@@ -715,7 +737,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
index = ERR_PTR(-ESTALE);
goto out;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
- ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+ inode_wrong_type(inode, d_inode(origin)->i_mode)) {
/*
* Index should always be of the same file type as origin
* except for the case of a whiteout index. A whiteout
@@ -735,7 +757,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}
/* Verify that dir index 'upper' xattr points to upper dir */
- err = ovl_verify_upper(index, upper, false);
+ err = ovl_verify_upper(ofs, index, upper, false);
if (err) {
if (err == -ESTALE) {
pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
@@ -784,19 +806,19 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
}
/* Fix missing 'origin' xattr */
-static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper)
+static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
+ struct dentry *lower, struct dentry *upper)
{
int err;
- if (ovl_check_origin_xattr(upper))
+ if (ovl_check_origin_xattr(ofs, upper))
return 0;
err = ovl_want_write(dentry);
if (err)
return err;
- err = ovl_set_origin(dentry, lower, upper);
+ err = ovl_set_origin(ofs, lower, upper);
if (!err)
err = ovl_set_impure(dentry->d_parent, upper->d_parent);
@@ -823,7 +845,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct dentry *this;
unsigned int i;
int err;
- bool metacopy = false;
+ bool uppermetacopy = false;
struct ovl_lookup_data d = {
.sb = dentry->d_sb,
.name = dentry->d_name,
@@ -841,18 +863,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
- err = ovl_lookup_layer(upperdir, &d, &upperdentry);
+ d.mnt = ovl_upper_mnt(ofs);
+ err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
if (err)
goto out;
- if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
+ if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
dput(upperdentry);
err = -EREMOTE;
goto out;
}
if (upperdentry && !d.is_dir) {
- unsigned int origin_ctr = 0;
-
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -863,13 +884,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(ofs, upperdentry, &origin_path,
- &origin_ctr);
+ err = ovl_check_origin(ofs, upperdentry, &origin_path);
if (err)
goto out_put_upper;
if (d.metacopy)
- metacopy = true;
+ uppermetacopy = true;
}
if (d.redirect) {
@@ -899,19 +919,27 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
else
d.last = lower.layer->idx == roe->numlower;
- err = ovl_lookup_layer(lower.dentry, &d, &this);
+ d.mnt = lower.layer->mnt;
+ err = ovl_lookup_layer(lower.dentry, &d, &this, false);
if (err)
goto out_put;
if (!this)
continue;
+ if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
+ dput(this);
+ err = -EPERM;
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
+ goto out_put;
+ }
+
/*
* If no origin fh is stored in upper of a merge dir, store fh
* of lower dir and set upper parent "impure".
*/
if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
- err = ovl_fix_origin(dentry, this, upperdentry);
+ err = ovl_fix_origin(ofs, dentry, this, upperdentry);
if (err) {
dput(this);
goto out_put;
@@ -930,7 +958,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperdentry && !ctr &&
((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
(!d.is_dir && ofs->config.index && origin_path))) {
- err = ovl_verify_origin(upperdentry, this, false);
+ err = ovl_verify_origin(ofs, upperdentry, this, false);
if (err) {
dput(this);
if (d.is_dir)
@@ -940,21 +968,21 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
origin = this;
}
- if (d.metacopy)
- metacopy = true;
- /*
- * Do not store intermediate metacopy dentries in chain,
- * except top most lower metacopy dentry
- */
if (d.metacopy && ctr) {
+ /*
+ * Do not store intermediate metacopy dentries in
+ * lower chain, except top most lower metacopy dentry.
+ * Continue the loop so that if there is an absolute
+ * redirect on this dentry, poe can be reset to roe.
+ */
dput(this);
- continue;
+ this = NULL;
+ } else {
+ stack[ctr].dentry = this;
+ stack[ctr].layer = lower.layer;
+ ctr++;
}
- stack[ctr].dentry = this;
- stack[ctr].layer = lower.layer;
- ctr++;
-
/*
* Following redirects can have security consequences: it's like
* a symlink into the lower layer without the permission checks.
@@ -982,22 +1010,19 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
- if (metacopy) {
- /*
- * Found a metacopy dentry but did not find corresponding
- * data dentry
- */
- if (d.metacopy) {
- err = -EIO;
- goto out_put;
- }
-
- err = -EPERM;
- if (!ofs->config.metacopy) {
- pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
- dentry);
- goto out_put;
- }
+ /*
+ * For regular non-metacopy upper dentries, there is no lower
+ * path based lookup, hence ctr will be zero. If a dentry is found
+ * using ORIGIN xattr on upper, install it in stack.
+ *
+ * For metacopy dentry, path based lookup will find lower dentries.
+ * Just make sure a corresponding data dentry has been found.
+ */
+ if (d.metacopy || (uppermetacopy && !ctr)) {
+ pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
+ dentry);
+ err = -EIO;
+ goto out_put;
} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
if (WARN_ON(stack != NULL)) {
err = -EIO;
@@ -1005,25 +1030,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
stack = origin_path;
ctr = 1;
+ origin = origin_path->dentry;
origin_path = NULL;
}
/*
- * Lookup index by lower inode and verify it matches upper inode.
- * We only trust dir index if we verified that lower dir matches
- * origin, otherwise dir index entries may be inconsistent and we
- * ignore them.
+ * Always lookup index if there is no-upperdentry.
+ *
+ * For the case of upperdentry, we have set origin by now if it
+ * needed to be set. There are basically three cases.
+ *
+ * For directories, lookup index by lower inode and verify it matches
+ * upper inode. We only trust dir index if we verified that lower dir
+ * matches origin, otherwise dir index entries may be inconsistent
+ * and we ignore them.
*
- * For non-dir upper metacopy dentry, we already set "origin" if we
- * verified that lower matched upper origin. If upper origin was
- * not present (because lower layer did not support fh encode/decode),
- * or indexing is not enabled, do not set "origin" and skip looking up
- * index. This case should be handled in same way as a non-dir upper
- * without ORIGIN is handled.
+ * For regular upper, we already set origin if upper had ORIGIN
+ * xattr. There is no verification though as there is no path
+ * based dentry lookup in lower in this case.
+ *
+ * For metacopy upper, we set a verified origin already if index
+ * is enabled and if upper had an ORIGIN xattr.
*
- * Always lookup index of non-dir non-metacopy and non-upper.
*/
- if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
+ if (!upperdentry && ctr)
origin = stack[0].dentry;
if (origin && ovl_indexdir(dentry->d_sb) &&
@@ -1050,13 +1080,21 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
else if (index) {
- upperdentry = dget(index);
- upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
+ struct path upperpath = {
+ .dentry = upperdentry = dget(index),
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
if (IS_ERR(upperredirect)) {
err = PTR_ERR(upperredirect);
upperredirect = NULL;
goto out_free_oe;
}
+ err = ovl_check_metacopy_xattr(ofs, &upperpath);
+ if (err < 0)
+ goto out_free_oe;
+ uppermetacopy = err;
}
if (upperdentry || ctr) {
@@ -1074,8 +1112,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
+ if (upperdentry && !uppermetacopy)
+ ovl_set_flag(OVL_UPPERDATA, inode);
}
+ ovl_dentry_update_reval(dentry, upperdentry,
+ DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+
revert_creds(old_cred);
if (origin_path) {
dput(origin_path->dentry);
@@ -1133,8 +1176,8 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;
- this = lookup_positive_unlocked(name->name, lowerdir,
- name->len);
+ this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt),
+ name->name, lowerdir, name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
case -ENOENT:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3d3f2b8bdae5..eee8f08d32b6 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/uuid.h>
#include <linux/fs.h>
+#include <linux/namei.h>
#include "ovl_entry.h"
#undef pr_fmt
@@ -22,14 +23,20 @@ enum ovl_path_type {
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
-#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
-#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
-#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
-#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
-#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
-#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
+#define OVL_XATTR_NAMESPACE "overlay."
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
+
+enum ovl_xattr {
+ OVL_XATTR_OPAQUE,
+ OVL_XATTR_REDIRECT,
+ OVL_XATTR_ORIGIN,
+ OVL_XATTR_IMPURE,
+ OVL_XATTR_NLINK,
+ OVL_XATTR_UPPER,
+ OVL_XATTR_METACOPY,
+ OVL_XATTR_PROTATTR,
+};
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
@@ -48,6 +55,12 @@ enum ovl_entry_flag {
OVL_E_CONNECTED,
};
+enum {
+ OVL_XINO_OFF,
+ OVL_XINO_AUTO,
+ OVL_XINO_ON,
+};
+
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -87,7 +100,7 @@ struct ovl_fb {
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
- u32 fid[0]; /* file identifier should be 32bit aligned in-memory */
+ u32 fid[]; /* file identifier should be 32bit aligned in-memory */
} __packed;
/* In-memory and on-wire format for overlay file handle */
@@ -104,90 +117,184 @@ struct ovl_fh {
#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
offsetof(struct ovl_fb, fid))
-static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
+extern const char *const ovl_xattr_table[][2];
+static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
+{
+ return ovl_xattr_table[ox][ofs->config.userxattr];
+}
+
+/*
+ * When changing ownership of an upper object map the intended ownership
+ * according to the upper layer's idmapping. When an upper mount idmaps files
+ * that are stored on-disk as owned by id 1001 to id 1000 this means stat on
+ * this object will report it as being owned by id 1000 when calling stat via
+ * the upper mount.
+ * In order to change ownership of an object so stat reports id 1000 when
+ * called on an idmapped upper mount the value written to disk - i.e., the
+ * value stored in ia_*id - must 1001. The mount mapping helper will thus take
+ * care to map 1000 to 1001.
+ * The mnt idmapping helpers are nops if the upper layer isn't idmapped.
+ */
+static inline int ovl_do_notify_change(struct ovl_fs *ofs,
+ struct dentry *upperdentry,
+ struct iattr *attr)
+{
+ return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL);
+}
+
+static inline int ovl_do_rmdir(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry)
{
- int err = vfs_rmdir(dir, dentry);
+ int err = vfs_rmdir(ovl_upper_mnt_userns(ofs), dir, dentry);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
}
-static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
+static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir,
+ struct dentry *dentry)
{
- int err = vfs_unlink(dir, dentry, NULL);
+ int err = vfs_unlink(ovl_upper_mnt_userns(ofs), dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
}
-static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
+static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry,
+ struct inode *dir, struct dentry *new_dentry)
{
- int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+ int err = vfs_link(old_dentry, ovl_upper_mnt_userns(ofs), dir, new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
}
-static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_create(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_create(dir, dentry, mode, true);
+ int err = vfs_create(ovl_upper_mnt_userns(ofs), dir, dentry, mode, true);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
-static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_mkdir(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
umode_t mode)
{
- int err = vfs_mkdir(dir, dentry, mode);
+ int err = vfs_mkdir(ovl_upper_mnt_userns(ofs), dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
-static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_mknod(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
- int err = vfs_mknod(dir, dentry, mode, dev);
+ int err = vfs_mknod(ovl_upper_mnt_userns(ofs), dir, dentry, mode, dev);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
}
-static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
+static inline int ovl_do_symlink(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry,
const char *oldname)
{
- int err = vfs_symlink(dir, dentry, oldname);
+ int err = vfs_symlink(ovl_upper_mnt_userns(ofs), dir, dentry, oldname);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
}
-static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name,
+ void *value, size_t size)
{
- int err = vfs_setxattr(dentry, name, value, size, flags);
- pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
+ int err, len;
+
+ WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb);
+
+ err = vfs_getxattr(mnt_user_ns(path->mnt), path->dentry,
+ name, value, size);
+ len = (value && err > 0) ? err : 0;
+
+ pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
+ path->dentry, name, min(len, 48), value, size, err);
+ return err;
+}
+
+static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs,
+ struct dentry *upperdentry,
+ enum ovl_xattr ox, void *value,
+ size_t size)
+{
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+
+ return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs,
+ const struct path *path,
+ enum ovl_xattr ox, void *value,
+ size_t size)
+{
+ return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size);
+}
+
+static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
+ value, size, flags);
+
+ pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
return err;
}
-static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, const void *value,
+ size_t size)
{
- int err = vfs_removexattr(dentry, name);
+ return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0);
+}
+
+static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+ const char *name)
+{
+ int err = vfs_removexattr(ovl_upper_mnt_userns(ofs), dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
-static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
- struct inode *newdir, struct dentry *newdentry,
- unsigned int flags)
+static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox)
+{
+ return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox));
+}
+
+static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
+ struct dentry *olddentry, struct inode *newdir,
+ struct dentry *newdentry, unsigned int flags)
{
int err;
+ struct renamedata rd = {
+ .old_mnt_userns = ovl_upper_mnt_userns(ofs),
+ .old_dir = olddir,
+ .old_dentry = olddentry,
+ .new_mnt_userns = ovl_upper_mnt_userns(ofs),
+ .new_dir = newdir,
+ .new_dentry = newdentry,
+ .flags = flags,
+ };
pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
- err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+ err = vfs_rename(&rd);
if (err) {
pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
olddentry, newdentry, err);
@@ -195,20 +302,31 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
return err;
}
-static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
+static inline int ovl_do_whiteout(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *dentry)
{
- int err = vfs_whiteout(dir, dentry);
+ int err = vfs_whiteout(ovl_upper_mnt_userns(ofs), dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
-static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
+ struct dentry *dentry, umode_t mode)
{
- struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
- int err = PTR_ERR_OR_ZERO(ret);
+ struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
+ struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode,
+ O_LARGEFILE | O_WRONLY, current_cred());
+ int err = PTR_ERR_OR_ZERO(file);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
- return ret;
+ return file;
+}
+
+static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
+ const char *name,
+ struct dentry *base, int len)
+{
+ return lookup_one(ovl_upper_mnt_userns(ofs), name, base, len);
}
static inline bool ovl_open_flags_need_copy_up(int flags)
@@ -219,6 +337,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
}
+static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs)
+{
+ /*
+ * To avoid regressions in existing setups with overlay lower offline
+ * changes, we allow lower changes only if none of the new features
+ * are used.
+ */
+ return (!ofs->config.index && !ofs->config.metacopy &&
+ !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON);
+}
+
+
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
@@ -230,15 +360,20 @@ bool ovl_index_all(struct super_block *sb);
bool ovl_verify_lower(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
+ unsigned int mask);
bool ovl_dentry_weird(struct dentry *dentry);
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
+void ovl_i_path_real(struct inode *inode, struct path *path);
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
+const struct ovl_layer *ovl_i_layer_lower(struct inode *inode);
const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
@@ -264,25 +399,32 @@ void ovl_set_upperdata(struct inode *inode);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry, struct dentry *lowerdata);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
-struct file *ovl_path_open(struct path *path, int flags);
+struct file *ovl_path_open(const struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_check_origin_xattr(struct dentry *dentry);
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox);
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
+
+static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
+ struct dentry *upperdentry)
+{
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
+ return ovl_path_check_origin_xattr(ofs, &upperpath);
+}
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_set_flag(unsigned long flag, struct inode *inode);
-void ovl_clear_flag(unsigned long flag, struct inode *inode);
-bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
bool ovl_is_inuse(struct dentry *dentry);
@@ -290,15 +432,46 @@ bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry);
void ovl_nlink_end(struct dentry *dentry);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
-int ovl_check_metacopy_xattr(struct dentry *dentry);
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
-ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
- size_t padding);
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
+int ovl_sync_status(struct ovl_fs *ofs);
+
+static inline void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+ set_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline void ovl_clear_flag(unsigned long flag, struct inode *inode)
+{
+ clear_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+ return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+static inline bool ovl_is_impuredir(struct super_block *sb,
+ struct dentry *upperdentry)
+{
+ struct ovl_fs *ofs = OVL_FS(sb);
+ struct path upperpath = {
+ .dentry = upperdentry,
+ .mnt = ovl_upper_mnt(ofs),
+ };
-static inline bool ovl_is_impuredir(struct dentry *dentry)
+ return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE);
+}
+
+/*
+ * With xino=auto, we do best effort to keep all inodes on same st_dev and
+ * d_ino consistent with st_ino.
+ * With xino=on, we do the same effort but we warn if we failed.
+ */
+static inline bool ovl_xino_warn(struct super_block *sb)
{
- return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+ return OVL_FS(sb)->config.xino == OVL_XINO_ON;
}
/* All layers on same fs? */
@@ -339,18 +512,23 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
{
+ if (fh_len < sizeof(struct ovl_fh))
+ return -EINVAL;
+
return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
}
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
- bool connected);
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct vfsmount *mnt, bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
-int ovl_verify_set_fh(struct dentry *dentry, const char *name,
- struct dentry *real, bool is_upper, bool set);
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
+ enum ovl_xattr ox, struct dentry *real, bool is_upper,
+ bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
-int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+ struct qstr *name);
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
@@ -359,57 +537,82 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
-static inline int ovl_verify_origin(struct dentry *upper,
+static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool set)
{
- return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+ return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
+ false, set);
}
-static inline int ovl_verify_upper(struct dentry *index,
- struct dentry *upper, bool set)
+static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
+ struct dentry *upper, bool set)
{
- return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+ return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
}
/* readdir.c */
extern const struct file_operations ovl_dir_operations;
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper);
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
-void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+ struct list_head *list);
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
-int ovl_check_d_type_supported(struct path *realpath);
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
- struct dentry *dentry, int level);
+int ovl_check_d_type_supported(const struct path *realpath);
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+ struct vfsmount *mnt, struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct ovl_fs *ofs);
+/*
+ * Can we iterate real dir directly?
+ *
+ * Non-merge dir may contain whiteouts from a time it was a merge upper, before
+ * lower dir was removed under it and possibly before it was rotated from upper
+ * to lower layer.
+ */
+static inline bool ovl_dir_is_real(struct dentry *dir)
+{
+ return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
+}
+
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
int ovl_set_nlink_lower(struct dentry *dentry);
-unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
-int ovl_setattr(struct dentry *dentry, struct iattr *attr);
-int ovl_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-int ovl_permission(struct inode *inode, int mask);
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr);
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int flags);
+int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask);
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-struct posix_acl *ovl_get_acl(struct inode *inode, int type);
+
+#ifdef CONFIG_FS_POSIX_ACL
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu);
+#else
+#define ovl_get_acl NULL
+#endif
+
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
-bool ovl_is_private_xattr(const char *name);
+bool ovl_is_private_xattr(struct super_block *sb, const char *name);
struct ovl_inode_params {
struct inode *newinode;
struct dentry *upperdentry;
struct ovl_path *lowerpath;
- struct dentry *index;
+ bool index;
unsigned int numlower;
char *redirect;
struct dentry *lowerdata;
};
+void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+ unsigned long ino, int fsid);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper);
@@ -417,27 +620,37 @@ bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip);
-static inline void ovl_copyattr(struct inode *from, struct inode *to)
-{
- to->i_uid = from->i_uid;
- to->i_gid = from->i_gid;
- to->i_mode = from->i_mode;
- to->i_atime = from->i_atime;
- to->i_mtime = from->i_mtime;
- to->i_ctime = from->i_ctime;
- i_size_write(to, i_size_read(from));
-}
+void ovl_copyattr(struct inode *to);
+
+/* vfs inode flags copied from real to ovl inode */
+#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE)
+/* vfs inode flags read from overlay.protattr xattr to ovl inode */
+#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE)
+
+/*
+ * fileattr flags copied from lower to upper inode on copy up.
+ * We cannot copy up immutable/append-only flags, because that would prevent
+ * linking temp inode to upper dir, so we store them in xattr instead.
+ */
+#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL)
+#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME)
+#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL)
+#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE)
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper);
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+ struct fileattr *fa);
static inline void ovl_copyflags(struct inode *from, struct inode *to)
{
- unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+ unsigned int mask = OVL_COPY_I_FLAGS_MASK;
inode_set_flags(to, from->i_flags & mask, mask);
}
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
@@ -448,25 +661,35 @@ struct ovl_cattr {
#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
-struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
+int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
+ struct dentry **newdentry, umode_t mode);
+struct dentry *ovl_create_real(struct ovl_fs *ofs,
+ struct inode *dir, struct dentry *newdentry,
+ struct ovl_cattr *attr);
+int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir);
+struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr);
-int ovl_cleanup(struct inode *dir, struct dentry *dentry);
-struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */
extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
+int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
+int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
+int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int ovl_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
-int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_maybe_copy_up(struct dentry *dentry, int flags);
-int ovl_copy_xattr(struct dentry *old, struct dentry *new);
-int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
+int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+ bool is_upper);
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
struct dentry *upper);
/* export.c */
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 89015ea822e7..e1af8f660698 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,9 +14,12 @@ struct ovl_config {
bool redirect_follow;
const char *redirect_mode;
bool index;
+ bool uuid;
bool nfs_export;
int xino;
bool metacopy;
+ bool userxattr;
+ bool ovl_volatile;
};
struct ovl_sb {
@@ -46,7 +49,6 @@ struct ovl_path {
/* private information held for overlayfs's superblock */
struct ovl_fs {
- struct vfsmount *upper_mnt;
unsigned int numlayer;
/* Number of unique fs among layers including upper fs */
unsigned int numfs;
@@ -68,20 +70,41 @@ struct ovl_fs {
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
+ bool share_whiteout;
/* Traps in ovl inode cache */
- struct inode *upperdir_trap;
struct inode *workbasedir_trap;
struct inode *workdir_trap;
struct inode *indexdir_trap;
/* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
int xino_mode;
+ /* For allocation of non-persistent inode numbers */
+ atomic_long_t last_ino;
+ /* Whiteout dentry cache */
+ struct dentry *whiteout;
+ /* r/o snapshot of upperdir sb's only taken on volatile mounts */
+ errseq_t errseq;
};
+static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
+{
+ return ofs->layers[0].mnt;
+}
+
+static inline struct user_namespace *ovl_upper_mnt_userns(struct ovl_fs *ofs)
+{
+ return mnt_user_ns(ovl_upper_mnt(ofs));
+}
+
static inline struct ovl_fs *OVL_FS(struct super_block *sb)
{
return (struct ovl_fs *)sb->s_fs_info;
}
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return !ofs->config.ovl_volatile;
+}
+
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
@@ -111,7 +134,7 @@ struct ovl_inode {
unsigned long flags;
struct inode vfs_inode;
struct dentry *__upperdentry;
- struct inode *lower;
+ struct ovl_path lowerpath;
/* synchronize copy up and more */
struct mutex lock;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 40ac9ce2465a..2b210640036c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -170,7 +170,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
return p;
}
-static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
const char *name, int len, u64 ino,
unsigned int d_type)
{
@@ -179,22 +179,22 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
struct ovl_cache_entry *p;
if (ovl_cache_entry_find_link(name, len, &newp, &parent))
- return 0;
+ return true;
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
- return -ENOMEM;
+ return false;
}
list_add_tail(&p->l_node, rdd->list);
rb_link_node(&p->node, parent, newp);
rb_insert_color(&p->node, rdd->root);
- return 0;
+ return true;
}
-static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
+static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
@@ -211,7 +211,7 @@ static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
list_add_tail(&p->l_node, &rdd->middle);
}
- return rdd->err;
+ return rdd->err == 0;
}
void ovl_cache_free(struct list_head *list)
@@ -250,7 +250,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
}
}
-static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -264,11 +264,11 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
}
-static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
+static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
{
int err;
struct ovl_cache_entry *p;
- struct dentry *dentry;
+ struct dentry *dentry, *dir = path->dentry;
const struct cred *old_cred;
old_cred = ovl_override_creds(rdd->dentry->d_sb);
@@ -278,7 +278,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
while (rdd->first_maybe_whiteout) {
p = rdd->first_maybe_whiteout;
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
- dentry = lookup_one_len(p->name, dir, p->len);
+ dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
if (!IS_ERR(dentry)) {
p->is_whiteout = ovl_is_whiteout(dentry);
dput(dentry);
@@ -291,13 +291,13 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
return err;
}
-static inline int ovl_dir_read(struct path *realpath,
+static inline int ovl_dir_read(const struct path *realpath,
struct ovl_readdir_data *rdd)
{
struct file *realfile;
int err;
- realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+ realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
if (IS_ERR(realfile))
return PTR_ERR(realfile);
@@ -312,25 +312,13 @@ static inline int ovl_dir_read(struct path *realpath,
} while (!err && rdd->count);
if (!err && rdd->first_maybe_whiteout && rdd->dentry)
- err = ovl_check_whiteouts(realpath->dentry, rdd);
+ err = ovl_check_whiteouts(realpath, rdd);
fput(realfile);
return err;
}
-/*
- * Can we iterate real dir directly?
- *
- * Non-merge dir may contain whiteouts from a time it was a merge upper, before
- * lower dir was removed under it and possibly before it was rotated from upper
- * to lower layer.
- */
-static bool ovl_dir_is_real(struct dentry *dir)
-{
- return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir));
-}
-
static void ovl_dir_reset(struct file *file)
{
struct ovl_dir_file *od = file->private_data;
@@ -438,15 +426,23 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
/* Map inode number to lower fs unique range */
static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
- const char *name, int namelen)
+ const char *name, int namelen, bool warn)
{
- if (ino >> (64 - xinobits)) {
- pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
- namelen, name, ino, xinobits);
+ unsigned int xinoshift = 64 - xinobits;
+
+ if (unlikely(ino >> xinoshift)) {
+ if (warn) {
+ pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+ namelen, name, ino, xinobits);
+ }
return ino;
}
- return ino | ((u64)fsid) << (64 - xinobits);
+ /*
+ * The lowest xinobit is reserved for mapping the non-peresistent inode
+ * numbers range, but this range is only exposed via st_ino, not here.
+ */
+ return ino | ((u64)fsid) << (xinoshift + 1);
}
/*
@@ -459,7 +455,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
* copy up origin, call vfs_getattr() on the overlay entry to make
* sure that d_ino will be consistent with st_ino from stat(2).
*/
-static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
+static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p)
{
struct dentry *dir = path->dentry;
@@ -483,8 +479,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
goto get;
}
}
- this = lookup_one_len(p->name, dir, p->len);
+ this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+ /* Mark a stale entry */
+ p->is_whiteout = true;
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -515,7 +513,8 @@ get:
} else if (xinobits && !OVL_TYPE_UPPER(type)) {
ino = ovl_remap_lower_ino(ino, xinobits,
ovl_layer_lower(this)->fsid,
- p->name, p->len);
+ p->name, p->len,
+ ovl_xino_warn(dir->d_sb));
}
out:
@@ -529,7 +528,7 @@ fail:
goto out;
}
-static int ovl_fill_plain(struct dir_context *ctx, const char *name,
+static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -541,14 +540,14 @@ static int ovl_fill_plain(struct dir_context *ctx, const char *name,
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
if (p == NULL) {
rdd->err = -ENOMEM;
- return -ENOMEM;
+ return false;
}
list_add_tail(&p->l_node, rdd->list);
- return 0;
+ return true;
}
-static int ovl_dir_read_impure(struct path *path, struct list_head *list,
+static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
struct rb_root *root)
{
int err;
@@ -593,10 +592,11 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list,
return 0;
}
-static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
+static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
{
int res;
struct dentry *dentry = path->dentry;
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct ovl_dir_cache *cache;
cache = ovl_dir_cache(d_inode(dentry));
@@ -623,8 +623,8 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
* Removing the "impure" xattr is best effort.
*/
if (!ovl_want_write(dentry)) {
- ovl_do_removexattr(ovl_dentry_upper(dentry),
- OVL_XATTR_IMPURE);
+ ovl_removexattr(ofs, ovl_dentry_upper(dentry),
+ OVL_XATTR_IMPURE);
ovl_drop_write(dentry);
}
ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
@@ -645,9 +645,10 @@ struct ovl_readdir_translate {
u64 parent_ino;
int fsid;
int xinobits;
+ bool xinowarn;
};
-static int ovl_fill_real(struct dir_context *ctx, const char *name,
+static bool ovl_fill_real(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -665,7 +666,7 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
ino = p->ino;
} else if (rdt->xinobits) {
ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
- name, namelen);
+ name, namelen, rdt->xinowarn);
}
return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
@@ -696,6 +697,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
.ctx.actor = ovl_fill_real,
.orig_ctx = ctx,
.xinobits = ovl_xino_bits(dir->d_sb),
+ .xinowarn = ovl_xino_warn(dir->d_sb),
};
if (rdt.xinobits && lower_layer)
@@ -732,8 +734,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
struct ovl_cache_entry *p;
+ const struct cred *old_cred;
int err;
+ old_cred = ovl_override_creds(dentry->d_sb);
if (!ctx->pos)
ovl_dir_reset(file);
@@ -747,17 +751,20 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
(ovl_same_fs(dentry->d_sb) &&
(ovl_is_impure_dir(file) ||
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
- return ovl_iterate_real(file, ctx);
+ err = ovl_iterate_real(file, ctx);
+ } else {
+ err = iterate_dir(od->realfile, ctx);
}
- return iterate_dir(od->realfile, ctx);
+ goto out;
}
if (!od->cache) {
struct ovl_dir_cache *cache;
cache = ovl_cache_get(dentry);
+ err = PTR_ERR(cache);
if (IS_ERR(cache))
- return PTR_ERR(cache);
+ goto out;
od->cache = cache;
ovl_seek_cursor(od, ctx->pos);
@@ -769,15 +776,21 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
if (!p->ino) {
err = ovl_cache_update_ino(&file->f_path, p);
if (err)
- return err;
+ goto out;
}
+ }
+ /* ovl_cache_update_ino() sets is_whiteout on stale entry */
+ if (!p->is_whiteout) {
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
break;
}
od->cursor = p->l_node.next;
ctx->pos++;
}
- return 0;
+ err = 0;
+out:
+ revert_creds(old_cred);
+ return err;
}
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -820,47 +833,77 @@ out_unlock:
return res;
}
-static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
+static struct file *ovl_dir_open_realfile(const struct file *file,
+ const struct path *realpath)
{
+ struct file *res;
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+ revert_creds(old_cred);
+
+ return res;
+}
+
+/*
+ * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
+ * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
+ *
+ * TODO: use same abstract type for file->private_data of dir and file so
+ * upperfile could also be cached for files as well.
+ */
+struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
+{
+
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
- struct file *realfile = od->realfile;
+ struct file *old, *realfile = od->realfile;
- /* Nothing to sync for lower */
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
- return 0;
+ return want_upper ? NULL : realfile;
/*
* Need to check if we started out being a lower dir, but got copied up
*/
if (!od->is_upper) {
- struct inode *inode = file_inode(file);
-
realfile = READ_ONCE(od->upperfile);
if (!realfile) {
struct path upperpath;
ovl_path_upper(dentry, &upperpath);
- realfile = ovl_path_open(&upperpath, O_RDONLY);
-
- inode_lock(inode);
- if (!od->upperfile) {
- if (IS_ERR(realfile)) {
- inode_unlock(inode);
- return PTR_ERR(realfile);
- }
- smp_store_release(&od->upperfile, realfile);
- } else {
- /* somebody has beaten us to it */
- if (!IS_ERR(realfile))
- fput(realfile);
- realfile = od->upperfile;
+ realfile = ovl_dir_open_realfile(file, &upperpath);
+ if (IS_ERR(realfile))
+ return realfile;
+
+ old = cmpxchg_release(&od->upperfile, NULL, realfile);
+ if (old) {
+ fput(realfile);
+ realfile = old;
}
- inode_unlock(inode);
}
}
+ return realfile;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct file *realfile;
+ int err;
+
+ err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
+ if (err <= 0)
+ return err;
+
+ realfile = ovl_dir_real_file(file, true);
+ err = PTR_ERR_OR_ZERO(realfile);
+
+ /* Nothing to sync for lower */
+ if (!realfile || err)
+ return err;
+
return vfs_fsync_range(realfile, start, end, datasync);
}
@@ -893,7 +936,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return -ENOMEM;
type = ovl_path_real(file->f_path.dentry, &realpath);
- realfile = ovl_path_open(&realpath, file->f_flags);
+ realfile = ovl_dir_open_realfile(file, &realpath);
if (IS_ERR(realfile)) {
kfree(od);
return PTR_ERR(realfile);
@@ -958,7 +1001,8 @@ del_entry:
return err;
}
-void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
+void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
+ struct list_head *list)
{
struct ovl_cache_entry *p;
@@ -969,7 +1013,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
if (WARN_ON(!p->is_whiteout || !p->is_upper))
continue;
- dentry = lookup_one_len(p->name, upper, p->len);
+ dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
if (IS_ERR(dentry)) {
pr_err("lookup '%s/%.*s' failed (%i)\n",
upper->d_name.name, p->len, p->name,
@@ -977,13 +1021,13 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
continue;
}
if (dentry->d_inode)
- ovl_cleanup(upper->d_inode, dentry);
+ ovl_cleanup(ofs, upper->d_inode, dentry);
dput(dentry);
}
inode_unlock(upper->d_inode);
}
-static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
int namelen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -992,19 +1036,19 @@ static int ovl_check_d_type(struct dir_context *ctx, const char *name,
/* Even if d_type is not supported, DT_DIR is returned for . and .. */
if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
- return 0;
+ return true;
if (d_type != DT_UNKNOWN)
rdd->d_type_supported = true;
- return 0;
+ return true;
}
/*
* Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
* if error is encountered.
*/
-int ovl_check_d_type_supported(struct path *realpath)
+int ovl_check_d_type_supported(const struct path *realpath)
{
int err;
struct ovl_readdir_data rdd = {
@@ -1019,7 +1063,10 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported;
}
-static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+#define OVL_INCOMPATDIR_NAME "incompat"
+
+static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
+ int level)
{
int err;
struct inode *dir = path->dentry->d_inode;
@@ -1033,6 +1080,19 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level)
.root = &root,
.is_lowest = false,
};
+ bool incompat = false;
+
+ /*
+ * The "work/incompat" directory is treated specially - if it is not
+ * empty, instead of printing a generic error and mounting read-only,
+ * we will error about incompat features and fail the mount.
+ *
+ * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
+ * starts with '#'.
+ */
+ if (level == 2 &&
+ !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
+ incompat = true;
err = ovl_dir_read(path, &rdd);
if (err)
@@ -1047,38 +1107,48 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level)
continue;
if (p->len == 2 && p->name[1] == '.')
continue;
+ } else if (incompat) {
+ pr_err("overlay with incompat feature '%s' cannot be mounted\n",
+ p->name);
+ err = -EINVAL;
+ break;
}
- dentry = lookup_one_len(p->name, path->dentry, p->len);
+ dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
if (IS_ERR(dentry))
continue;
if (dentry->d_inode)
- ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+ err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
dput(dentry);
+ if (err)
+ break;
}
inode_unlock(dir);
out:
ovl_cache_free(&list);
+ return err;
}
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
- struct dentry *dentry, int level)
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+ struct vfsmount *mnt, struct dentry *dentry, int level)
{
int err;
if (!d_is_dir(dentry) || level > 1) {
- ovl_cleanup(dir, dentry);
- return;
+ return ovl_cleanup(ofs, dir, dentry);
}
- err = ovl_do_rmdir(dir, dentry);
+ err = ovl_do_rmdir(ofs, dir, dentry);
if (err) {
struct path path = { .mnt = mnt, .dentry = dentry };
inode_unlock(dir);
- ovl_workdir_cleanup_recurse(&path, level + 1);
+ err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
inode_lock_nested(dir, I_MUTEX_PARENT);
- ovl_cleanup(dir, dentry);
+ if (!err)
+ err = ovl_cleanup(ofs, dir, dentry);
}
+
+ return err;
}
int ovl_indexdir_cleanup(struct ovl_fs *ofs)
@@ -1087,7 +1157,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
struct dentry *indexdir = ofs->indexdir;
struct dentry *index = NULL;
struct inode *dir = indexdir->d_inode;
- struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
+ struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
LIST_HEAD(list);
struct rb_root root = RB_ROOT;
struct ovl_cache_entry *p;
@@ -1111,18 +1181,25 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
if (p->len == 2 && p->name[1] == '.')
continue;
}
- index = lookup_one_len(p->name, indexdir, p->len);
+ index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
break;
}
+ /* Cleanup leftover from index create/cleanup attempt */
+ if (index->d_name.name[0] == '#') {
+ err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
+ if (err)
+ break;
+ goto next;
+ }
err = ovl_verify_index(ofs, index);
if (!err) {
goto next;
} else if (err == -ESTALE) {
/* Cleanup stale index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, dir, index);
} else if (err != -ENOENT) {
/*
* Abort mount to avoid corrupting the index if
@@ -1135,10 +1212,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
* Whiteout orphan index to block future open by
* handle after overlay nlink dropped to zero.
*/
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, dir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, dir, index);
}
if (err)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ac967f1cb6e5..a29a8afe9b26 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include <linux/exportfs.h>
+#include <linux/file.h>
#include "overlayfs.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
@@ -79,7 +80,7 @@ static void ovl_dentry_release(struct dentry *dentry)
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode)
{
- struct dentry *real;
+ struct dentry *real = NULL, *lower;
/* It's an overlay file */
if (inode && d_inode(dentry) == inode)
@@ -98,9 +99,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
return real;
- real = ovl_dentry_lowerdata(dentry);
- if (!real)
+ lower = ovl_dentry_lowerdata(dentry);
+ if (!lower)
goto bug;
+ real = lower;
/* Handle recursion */
real = d_real(real, inode);
@@ -108,58 +110,61 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
if (!inode || inode == d_inode(real))
return real;
bug:
- WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
- inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+ WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
+ __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
+ inode ? inode->i_ino : 0, real,
+ real && d_inode(real) ? d_inode(real)->i_ino : 0);
return dentry;
}
-static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- unsigned int i;
int ret = 1;
- for (i = 0; i < oe->numlower; i++) {
- struct dentry *d = oe->lowerstack[i].dentry;
-
- if (d->d_flags & DCACHE_OP_REVALIDATE) {
- ret = d->d_op->d_revalidate(d, flags);
- if (ret < 0)
- return ret;
- if (!ret) {
- if (!(flags & LOOKUP_RCU))
- d_invalidate(d);
- return -ESTALE;
- }
+ if (weak) {
+ if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
+ ret = d->d_op->d_weak_revalidate(d, flags);
+ } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
+ ret = d->d_op->d_revalidate(d, flags);
+ if (!ret) {
+ if (!(flags & LOOKUP_RCU))
+ d_invalidate(d);
+ ret = -ESTALE;
}
}
- return 1;
+ return ret;
}
-static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+static int ovl_dentry_revalidate_common(struct dentry *dentry,
+ unsigned int flags, bool weak)
{
struct ovl_entry *oe = dentry->d_fsdata;
+ struct dentry *upper;
unsigned int i;
int ret = 1;
- for (i = 0; i < oe->numlower; i++) {
- struct dentry *d = oe->lowerstack[i].dentry;
+ upper = ovl_dentry_upper(dentry);
+ if (upper)
+ ret = ovl_revalidate_real(upper, flags, weak);
- if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
- ret = d->d_op->d_weak_revalidate(d, flags);
- if (ret <= 0)
- break;
- }
+ for (i = 0; ret > 0 && i < oe->numlower; i++) {
+ ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
+ weak);
}
return ret;
}
-static const struct dentry_operations ovl_dentry_operations = {
- .d_release = ovl_dentry_release,
- .d_real = ovl_d_real,
-};
+static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return ovl_dentry_revalidate_common(dentry, flags, false);
+}
+
+static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return ovl_dentry_revalidate_common(dentry, flags, true);
+}
-static const struct dentry_operations ovl_reval_dentry_operations = {
+static const struct dentry_operations ovl_dentry_operations = {
.d_release = ovl_dentry_release,
.d_real = ovl_d_real,
.d_revalidate = ovl_dentry_revalidate,
@@ -170,7 +175,7 @@ static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
{
- struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+ struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
if (!oi)
return NULL;
@@ -180,7 +185,8 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
oi->version = 0;
oi->flags = 0;
oi->__upperdentry = NULL;
- oi->lower = NULL;
+ oi->lowerpath.dentry = NULL;
+ oi->lowerpath.layer = NULL;
oi->lowerdata = NULL;
mutex_init(&oi->lock);
@@ -201,7 +207,7 @@ static void ovl_destroy_inode(struct inode *inode)
struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
- iput(oi->lower);
+ dput(oi->lowerpath.dentry);
if (S_ISDIR(inode->i_mode))
ovl_dir_cache_free(inode);
else
@@ -210,24 +216,28 @@ static void ovl_destroy_inode(struct inode *inode)
static void ovl_free_fs(struct ovl_fs *ofs)
{
+ struct vfsmount **mounts;
unsigned i;
iput(ofs->workbasedir_trap);
iput(ofs->indexdir_trap);
iput(ofs->workdir_trap);
- iput(ofs->upperdir_trap);
+ dput(ofs->whiteout);
dput(ofs->indexdir);
dput(ofs->workdir);
if (ofs->workdir_locked)
ovl_inuse_unlock(ofs->workbasedir);
dput(ofs->workbasedir);
if (ofs->upperdir_locked)
- ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
- mntput(ofs->upper_mnt);
- for (i = 1; i < ofs->numlayer; i++) {
+ ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+
+ /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
+ mounts = (struct vfsmount **) ofs->layers;
+ for (i = 0; i < ofs->numlayer; i++) {
iput(ofs->layers[i].trap);
- mntput(ofs->layers[i].mnt);
+ mounts[i] = ofs->layers[i].mnt;
}
+ kern_unmount_array(mounts, ofs->numlayer);
kfree(ofs->layers);
for (i = 0; i < ofs->numfs; i++)
free_anon_bdev(ofs->fs[i].pseudo_dev);
@@ -256,12 +266,23 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
struct super_block *upper_sb;
int ret;
- if (!ofs->upper_mnt)
- return 0;
+ ret = ovl_sync_status(ofs);
+ /*
+ * We have to always set the err, because the return value isn't
+ * checked in syncfs, and instead indirectly return an error via
+ * the sb's writeback errseq, which VFS inspects after this call.
+ */
+ if (ret < 0) {
+ errseq_set(&sb->s_wb_err, -EIO);
+ return -EIO;
+ }
+
+ if (!ret)
+ return ret;
/*
- * If this is a sync(2) call or an emergency sync, all the super blocks
- * will be iterated, including upper_sb, so no need to do anything.
+ * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
+ * All the super blocks will be iterated, including upper_sb.
*
* If this is a syncfs(2) call, then we do need to call
* sync_filesystem() on upper_sb, but enough if we do it when being
@@ -270,7 +291,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
if (!wait)
return 0;
- upper_sb = ofs->upper_mnt->mnt_sb;
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
down_read(&upper_sb->s_umount);
ret = sync_filesystem(upper_sb);
@@ -281,7 +302,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
/**
* ovl_statfs
- * @sb: The overlayfs super block
+ * @dentry: The dentry to query
* @buf: The struct kstatfs to fill in with stats
*
* Get the filesystem statistics. As writes always target the upper layer
@@ -308,7 +329,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
/* Will this overlay be forced to mount/remount ro? */
static bool ovl_force_readonly(struct ovl_fs *ofs)
{
- return (!ofs->upper_mnt || !ofs->workdir);
+ return (!ovl_upper_mnt(ofs) || !ofs->workdir);
}
static const char *ovl_redirect_mode_def(void)
@@ -316,12 +337,6 @@ static const char *ovl_redirect_mode_def(void)
return ovl_redirect_dir_def ? "on" : "off";
}
-enum {
- OVL_XINO_OFF,
- OVL_XINO_AUTO,
- OVL_XINO_ON,
-};
-
static const char * const ovl_xino_str[] = {
"off",
"auto",
@@ -335,6 +350,8 @@ static inline int ovl_xino_def(void)
/**
* ovl_show_options
+ * @m: the seq_file handle
+ * @dentry: The dentry to query
*
* Prints the mount options for a given superblock.
* Returns zero; does not fail.
@@ -355,6 +372,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
if (ofs->config.index != ovl_index_def)
seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+ if (!ofs->config.uuid)
+ seq_puts(m, ",uuid=off");
if (ofs->config.nfs_export != ovl_nfs_export_def)
seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
"on" : "off");
@@ -363,17 +382,32 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s",
ofs->config.metacopy ? "on" : "off");
+ if (ofs->config.ovl_volatile)
+ seq_puts(m, ",volatile");
+ if (ofs->config.userxattr)
+ seq_puts(m, ",userxattr");
return 0;
}
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ofs = sb->s_fs_info;
+ struct super_block *upper_sb;
+ int ret = 0;
if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
return -EROFS;
- return 0;
+ if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+ if (ovl_should_sync(ofs)) {
+ down_read(&upper_sb->s_umount);
+ ret = sync_filesystem(upper_sb);
+ up_read(&upper_sb->s_umount);
+ }
+ }
+
+ return ret;
}
static const struct super_operations ovl_super_operations = {
@@ -396,13 +430,17 @@ enum {
OPT_REDIRECT_DIR,
OPT_INDEX_ON,
OPT_INDEX_OFF,
+ OPT_UUID_ON,
+ OPT_UUID_OFF,
OPT_NFS_EXPORT_ON,
+ OPT_USERXATTR,
OPT_NFS_EXPORT_OFF,
OPT_XINO_ON,
OPT_XINO_OFF,
OPT_XINO_AUTO,
OPT_METACOPY_ON,
OPT_METACOPY_OFF,
+ OPT_VOLATILE,
OPT_ERR,
};
@@ -414,6 +452,9 @@ static const match_table_t ovl_tokens = {
{OPT_REDIRECT_DIR, "redirect_dir=%s"},
{OPT_INDEX_ON, "index=on"},
{OPT_INDEX_OFF, "index=off"},
+ {OPT_USERXATTR, "userxattr"},
+ {OPT_UUID_ON, "uuid=on"},
+ {OPT_UUID_OFF, "uuid=off"},
{OPT_NFS_EXPORT_ON, "nfs_export=on"},
{OPT_NFS_EXPORT_OFF, "nfs_export=off"},
{OPT_XINO_ON, "xino=on"},
@@ -421,6 +462,7 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_AUTO, "xino=auto"},
{OPT_METACOPY_ON, "metacopy=on"},
{OPT_METACOPY_OFF, "metacopy=off"},
+ {OPT_VOLATILE, "volatile"},
{OPT_ERR, NULL}
};
@@ -475,6 +517,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
char *p;
int err;
bool metacopy_opt = false, redirect_opt = false;
+ bool nfs_export_opt = false, index_opt = false;
config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
if (!config->redirect_mode)
@@ -524,18 +567,30 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
case OPT_INDEX_ON:
config->index = true;
+ index_opt = true;
break;
case OPT_INDEX_OFF:
config->index = false;
+ index_opt = true;
+ break;
+
+ case OPT_UUID_ON:
+ config->uuid = true;
+ break;
+
+ case OPT_UUID_OFF:
+ config->uuid = false;
break;
case OPT_NFS_EXPORT_ON:
config->nfs_export = true;
+ nfs_export_opt = true;
break;
case OPT_NFS_EXPORT_OFF:
config->nfs_export = false;
+ nfs_export_opt = true;
break;
case OPT_XINO_ON:
@@ -557,6 +612,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
case OPT_METACOPY_OFF:
config->metacopy = false;
+ metacopy_opt = true;
+ break;
+
+ case OPT_VOLATILE:
+ config->ovl_volatile = true;
+ break;
+
+ case OPT_USERXATTR:
+ config->userxattr = true;
break;
default:
@@ -566,12 +630,24 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
}
- /* Workdir is useless in non-upper mount */
- if (!config->upperdir && config->workdir) {
- pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
- config->workdir);
- kfree(config->workdir);
- config->workdir = NULL;
+ /* Workdir/index are useless in non-upper mount */
+ if (!config->upperdir) {
+ if (config->workdir) {
+ pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ config->workdir);
+ kfree(config->workdir);
+ config->workdir = NULL;
+ }
+ if (config->index && index_opt) {
+ pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
+ index_opt = false;
+ }
+ config->index = false;
+ }
+
+ if (!config->upperdir && config->ovl_volatile) {
+ pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
+ config->ovl_volatile = false;
}
err = ovl_parse_redirect_mode(config, config->redirect_mode);
@@ -606,6 +682,72 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
}
+ /* Resolve nfs_export -> index dependency */
+ if (config->nfs_export && !config->index) {
+ if (!config->upperdir && config->redirect_follow) {
+ pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ config->nfs_export = false;
+ } else if (nfs_export_opt && index_opt) {
+ pr_err("conflicting options: nfs_export=on,index=off\n");
+ return -EINVAL;
+ } else if (index_opt) {
+ /*
+ * There was an explicit index=off that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to index=off\n");
+ config->nfs_export = false;
+ } else {
+ /* Automatically enable index otherwise. */
+ config->index = true;
+ }
+ }
+
+ /* Resolve nfs_export -> !metacopy dependency */
+ if (config->nfs_export && config->metacopy) {
+ if (nfs_export_opt && metacopy_opt) {
+ pr_err("conflicting options: nfs_export=on,metacopy=on\n");
+ return -EINVAL;
+ }
+ if (metacopy_opt) {
+ /*
+ * There was an explicit metacopy=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to metacopy=on\n");
+ config->nfs_export = false;
+ } else {
+ /*
+ * There was an explicit nfs_export=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling metacopy due to nfs_export=on\n");
+ config->metacopy = false;
+ }
+ }
+
+
+ /* Resolve userxattr -> !redirect && !metacopy dependency */
+ if (config->userxattr) {
+ if (config->redirect_follow && redirect_opt) {
+ pr_err("conflicting options: userxattr,redirect_dir=%s\n",
+ config->redirect_mode);
+ return -EINVAL;
+ }
+ if (config->metacopy && metacopy_opt) {
+ pr_err("conflicting options: userxattr,metacopy=on\n");
+ return -EINVAL;
+ }
+ /*
+ * Silently disable default setting of redirect and metacopy.
+ * This shall be the default in the future as well: these
+ * options must be explicitly enabled if used together with
+ * userxattr.
+ */
+ config->redirect_dir = config->redirect_follow = false;
+ config->metacopy = false;
+ }
+
return 0;
}
@@ -616,17 +758,14 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
const char *name, bool persist)
{
struct inode *dir = ofs->workbasedir->d_inode;
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
struct dentry *work;
int err;
bool retried = false;
- bool locked = false;
inode_lock_nested(dir, I_MUTEX_PARENT);
- locked = true;
-
retry:
- work = lookup_one_len(name, ofs->workbasedir, strlen(name));
+ work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -643,15 +782,23 @@ retry:
goto out_unlock;
retried = true;
- ovl_workdir_cleanup(dir, mnt, work, 0);
+ err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
dput(work);
+ if (err == -EINVAL) {
+ work = ERR_PTR(err);
+ goto out_unlock;
+ }
goto retry;
}
- work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
- err = PTR_ERR(work);
- if (IS_ERR(work))
- goto out_err;
+ err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
+ if (err)
+ goto out_dput;
+
+ /* Weird filesystem returning with hashed negative (kernfs)? */
+ err = -EINVAL;
+ if (d_really_is_negative(work))
+ goto out_dput;
/*
* Try to remove POSIX ACL xattrs from workdir. We are good if:
@@ -666,17 +813,19 @@ retry:
* allowed as upper are limited to "normal" ones, where checking
* for the above two errors is sufficient.
*/
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+ err = ovl_do_removexattr(ofs, work,
+ XATTR_NAME_POSIX_ACL_DEFAULT);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
- err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+ err = ovl_do_removexattr(ofs, work,
+ XATTR_NAME_POSIX_ACL_ACCESS);
if (err && err != -ENODATA && err != -EOPNOTSUPP)
goto out_dput;
/* Clear any inherited mode bits */
inode_lock(work->d_inode);
- err = notify_change(work, &attr, NULL);
+ err = ovl_do_notify_change(ofs, work, &attr);
inode_unlock(work->d_inode);
if (err)
goto out_dput;
@@ -685,9 +834,7 @@ retry:
goto out_err;
}
out_unlock:
- if (locked)
- inode_unlock(dir);
-
+ inode_unlock(dir);
return work;
out_dput:
@@ -751,19 +898,18 @@ static int ovl_mount_dir(const char *name, struct path *path)
ovl_unescape(tmp);
err = ovl_mount_dir_noesc(tmp, path);
- if (!err)
- if (ovl_dentry_remote(path->dentry)) {
- pr_err("filesystem on '%s' not supported as upperdir\n",
- tmp);
- path_put_init(path);
- err = -EINVAL;
- }
+ if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
+ pr_err("filesystem on '%s' not supported as upperdir\n",
+ tmp);
+ path_put_init(path);
+ err = -EINVAL;
+ }
kfree(tmp);
}
return err;
}
-static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
+static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
const char *name)
{
struct kstatfs statfs;
@@ -778,24 +924,21 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
}
static int ovl_lower_dir(const char *name, struct path *path,
- struct ovl_fs *ofs, int *stack_depth, bool *remote)
+ struct ovl_fs *ofs, int *stack_depth)
{
int fh_type;
int err;
err = ovl_mount_dir_noesc(name, path);
if (err)
- goto out;
+ return err;
err = ovl_check_namelen(path, ofs, name);
if (err)
- goto out_put;
+ return err;
*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
- if (ovl_dentry_remote(path->dentry))
- *remote = true;
-
/*
* The inodes index feature and NFS export need to encode and decode
* file handles, so they require that all layers support them.
@@ -808,17 +951,22 @@ static int ovl_lower_dir(const char *name, struct path *path,
pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
name);
}
+ /*
+ * Decoding origin file handle is required for persistent st_ino.
+ * Without persistent st_ino, xino=auto falls back to xino=off.
+ */
+ if (ofs->config.xino == OVL_XINO_AUTO &&
+ ofs->config.upperdir && !fh_type) {
+ ofs->config.xino = OVL_XINO_OFF;
+ pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
+ name);
+ }
/* Check if lower fs has 32bit inode numbers */
if (fh_type != FILEID_INO32_GEN)
ofs->xino_mode = -1;
return 0;
-
-out_put:
- path_put_init(path);
-out:
- return err;
}
/* Workdir should not be subdir of upperdir and vice versa */
@@ -863,6 +1011,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -874,7 +1023,20 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
/* Check that everything is OK before copy-up */
if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ /* The above comment can be understood in two ways:
+ *
+ * 1. We just want to check whether the basic POSIX ACL format
+ * is ok. For example, if the header is correct and the size
+ * is sane.
+ * 2. We want to know whether the ACL_{GROUP,USER} entries can
+ * be mapped according to the underlying filesystem.
+ *
+ * Currently, we only check 1. If we wanted to check 2. we
+ * would need to pass the mnt_userns and the fs_userns of the
+ * underlying filesystem. But frankly, I think checking 1. is
+ * enough to start the copy-up.
+ */
+ acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
@@ -888,7 +1050,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
goto out_acl_release;
}
err = -EPERM;
- if (!inode_owner_or_capable(inode))
+ if (!inode_owner_or_capable(&init_user_ns, inode))
goto out_acl_release;
posix_acl_release(acl);
@@ -900,18 +1062,15 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
if (unlikely(inode->i_mode & S_ISGID) &&
handler->flags == ACL_TYPE_ACCESS &&
!in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
+ !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
- err = ovl_setattr(dentry, &iattr);
+ err = ovl_setattr(&init_user_ns, dentry, &iattr);
if (err)
return err;
}
err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
- if (!err)
- ovl_copyattr(ovl_inode_real(inode), inode);
-
return err;
out_acl_release:
@@ -927,6 +1086,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler,
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -942,6 +1102,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -965,8 +1126,14 @@ ovl_posix_acl_default_xattr_handler = {
.set = ovl_posix_acl_xattr_set,
};
-static const struct xattr_handler ovl_own_xattr_handler = {
- .prefix = OVL_XATTR_PREFIX,
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+ .prefix = OVL_XATTR_TRUSTED_PREFIX,
+ .get = ovl_own_xattr_get,
+ .set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+ .prefix = OVL_XATTR_USER_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
@@ -977,12 +1144,22 @@ static const struct xattr_handler ovl_other_xattr_handler = {
.set = ovl_other_xattr_set,
};
-static const struct xattr_handler *ovl_xattr_handlers[] = {
+static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
#ifdef CONFIG_FS_POSIX_ACL
&ovl_posix_acl_access_xattr_handler,
&ovl_posix_acl_default_xattr_handler,
#endif
- &ovl_own_xattr_handler,
+ &ovl_own_trusted_xattr_handler,
+ &ovl_other_xattr_handler,
+ NULL
+};
+
+static const struct xattr_handler *ovl_user_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+ &ovl_posix_acl_access_xattr_handler,
+ &ovl_posix_acl_default_xattr_handler,
+#endif
+ &ovl_own_user_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
@@ -1025,7 +1202,7 @@ static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
}
static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
- struct path *upperpath)
+ struct ovl_layer *upper_layer, struct path *upperpath)
{
struct vfsmount *upper_mnt;
int err;
@@ -1034,8 +1211,8 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
if (err)
goto out;
- /* Upper fs should not be r/o */
- if (sb_rdonly(upperpath->mnt->mnt_sb)) {
+ /* Upperdir path should not be r/o */
+ if (__mnt_is_readonly(upperpath->mnt)) {
pr_err("upper fs is r/o, try multi-lower layers mount\n");
err = -EINVAL;
goto out;
@@ -1045,7 +1222,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
if (err)
goto out;
- err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
+ err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
"upperdir");
if (err)
goto out;
@@ -1059,9 +1236,23 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
/* Don't inherit atime flags */
upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
- ofs->upper_mnt = upper_mnt;
+ upper_layer->mnt = upper_mnt;
+ upper_layer->idx = 0;
+ upper_layer->fsid = 0;
+
+ /*
+ * Inherit SB_NOSEC flag from upperdir.
+ *
+ * This optimization changes behavior when a security related attribute
+ * (suid/sgid/security.*) is changed on an underlying layer. This is
+ * okay because we don't yet have guarantees in that case, but it will
+ * need careful treatment once we want to honour changes to underlying
+ * filesystems.
+ */
+ if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
+ sb->s_flags |= SB_NOSEC;
- if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
+ if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
ofs->upperdir_locked = true;
} else {
err = ovl_report_in_use(ofs, "upperdir");
@@ -1074,11 +1265,115 @@ out:
return err;
}
-static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
- struct path *workpath)
+/*
+ * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
+ * negative values if error is encountered.
+ */
+static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
{
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct dentry *workdir = ofs->workdir;
+ struct inode *dir = d_inode(workdir);
struct dentry *temp;
+ struct dentry *dest;
+ struct dentry *whiteout;
+ struct name_snapshot name;
+ int err;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+
+ temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
+ err = PTR_ERR(temp);
+ if (IS_ERR(temp))
+ goto out_unlock;
+
+ dest = ovl_lookup_temp(ofs, workdir);
+ err = PTR_ERR(dest);
+ if (IS_ERR(dest)) {
+ dput(temp);
+ goto out_unlock;
+ }
+
+ /* Name is inline and stable - using snapshot as a copy helper */
+ take_dentry_name_snapshot(&name, temp);
+ err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT);
+ if (err) {
+ if (err == -EINVAL)
+ err = 0;
+ goto cleanup_temp;
+ }
+
+ whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto cleanup_temp;
+
+ err = ovl_is_whiteout(whiteout);
+
+ /* Best effort cleanup of whiteout and temp file */
+ if (err)
+ ovl_cleanup(ofs, dir, whiteout);
+ dput(whiteout);
+
+cleanup_temp:
+ ovl_cleanup(ofs, dir, temp);
+ release_dentry_name_snapshot(&name);
+ dput(temp);
+ dput(dest);
+
+out_unlock:
+ inode_unlock(dir);
+
+ return err;
+}
+
+static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
+ struct dentry *parent,
+ const char *name, umode_t mode)
+{
+ size_t len = strlen(name);
+ struct dentry *child;
+
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ child = ovl_lookup_upper(ofs, name, parent, len);
+ if (!IS_ERR(child) && !child->d_inode)
+ child = ovl_create_real(ofs, parent->d_inode, child,
+ OVL_CATTR(mode));
+ inode_unlock(parent->d_inode);
+ dput(parent);
+
+ return child;
+}
+
+/*
+ * Creates $workdir/work/incompat/volatile/dirty file if it is not already
+ * present.
+ */
+static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
+{
+ unsigned int ctr;
+ struct dentry *d = dget(ofs->workbasedir);
+ static const char *const volatile_path[] = {
+ OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
+ };
+ const char *const *name = volatile_path;
+
+ for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
+ d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+ }
+ dput(d);
+ return 0;
+}
+
+static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ const struct path *workpath)
+{
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
+ struct dentry *workdir;
+ struct file *tmpfile;
+ bool rename_whiteout;
+ bool d_type;
int fh_type;
int err;
@@ -1086,10 +1381,13 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
if (err)
return err;
- ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
- if (!ofs->workdir)
+ workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
+ err = PTR_ERR(workdir);
+ if (IS_ERR_OR_NULL(workdir))
goto out;
+ ofs->workdir = workdir;
+
err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
if (err)
goto out;
@@ -1104,33 +1402,77 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
if (err < 0)
goto out;
- /*
- * We allowed this configuration and don't want to break users over
- * kernel upgrade. So warn instead of erroring out.
- */
- if (!err)
+ d_type = err;
+ if (!d_type)
pr_warn("upper fs needs to support d_type.\n");
/* Check if upper/work fs supports O_TMPFILE */
- temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
- ofs->tmpfile = !IS_ERR(temp);
+ tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0);
+ ofs->tmpfile = !IS_ERR(tmpfile);
if (ofs->tmpfile)
- dput(temp);
+ fput(tmpfile);
else
pr_warn("upper fs does not support tmpfile.\n");
+
+ /* Check if upper/work fs supports RENAME_WHITEOUT */
+ err = ovl_check_rename_whiteout(ofs);
+ if (err < 0)
+ goto out;
+
+ rename_whiteout = err;
+ if (!rename_whiteout)
+ pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
+
/*
- * Check if upper/work fs supports trusted.overlay.* xattr
+ * Check if upper/work fs supports (trusted|user).overlay.* xattr
*/
- err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
+ err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
if (err) {
+ pr_warn("failed to set xattr on upper\n");
ofs->noxattr = true;
- ofs->config.index = false;
- ofs->config.metacopy = false;
- pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
+ if (ofs->config.index || ofs->config.metacopy) {
+ ofs->config.index = false;
+ ofs->config.metacopy = false;
+ pr_warn("...falling back to index=off,metacopy=off.\n");
+ }
+ /*
+ * xattr support is required for persistent st_ino.
+ * Without persistent st_ino, xino=auto falls back to xino=off.
+ */
+ if (ofs->config.xino == OVL_XINO_AUTO) {
+ ofs->config.xino = OVL_XINO_OFF;
+ pr_warn("...falling back to xino=off.\n");
+ }
+ if (err == -EPERM && !ofs->config.userxattr)
+ pr_info("try mounting with 'userxattr' option\n");
err = 0;
} else {
- vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
+ ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
+ }
+
+ /*
+ * We allowed sub-optimal upper fs configuration and don't want to break
+ * users over kernel upgrade, but we never allowed remote upper fs, so
+ * we can enforce strict requirements for remote upper fs.
+ */
+ if (ovl_dentry_remote(ofs->workdir) &&
+ (!d_type || !rename_whiteout || ofs->noxattr)) {
+ pr_err("upper fs missing required features.\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * For volatile mount, create a incompat/volatile/dirty file to keep
+ * track of it.
+ */
+ if (ofs->config.ovl_volatile) {
+ err = ovl_create_volatile_dirty(ofs);
+ if (err < 0) {
+ pr_err("Failed to create volatile/dirty file.\n");
+ goto out;
+ }
}
/* Check if upper/work fs supports file handles */
@@ -1155,7 +1497,7 @@ out:
}
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
- struct path *upperpath)
+ const struct path *upperpath)
{
int err;
struct path workpath = { };
@@ -1198,9 +1540,10 @@ out:
}
static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
- struct ovl_entry *oe, struct path *upperpath)
+ struct ovl_entry *oe, const struct path *upperpath)
{
- struct vfsmount *mnt = ofs->upper_mnt;
+ struct vfsmount *mnt = ovl_upper_mnt(ofs);
+ struct dentry *indexdir;
int err;
err = mnt_want_write(mnt);
@@ -1208,15 +1551,25 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
return err;
/* Verify lower root is upper root origin */
- err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
- true);
+ err = ovl_verify_origin(ofs, upperpath->dentry,
+ oe->lowerstack[0].dentry, true);
if (err) {
pr_err("failed to verify upper root origin\n");
goto out;
}
- ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
- if (ofs->indexdir) {
+ /* index dir will act also as workdir */
+ iput(ofs->workdir_trap);
+ ofs->workdir_trap = NULL;
+ dput(ofs->workdir);
+ ofs->workdir = NULL;
+ indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
+ if (IS_ERR(indexdir)) {
+ err = PTR_ERR(indexdir);
+ } else if (indexdir) {
+ ofs->indexdir = indexdir;
+ ofs->workdir = dget(indexdir);
+
err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
"indexdir");
if (err)
@@ -1224,19 +1577,21 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
/*
* Verify upper root is exclusively associated with index dir.
- * Older kernels stored upper fh in "trusted.overlay.origin"
+ * Older kernels stored upper fh in ".overlay.origin"
* xattr. If that xattr exists, verify that it is a match to
* upper dir file handle. In any case, verify or set xattr
- * "trusted.overlay.upper" to indicate that index may have
+ * ".overlay.upper" to indicate that index may have
* directory entries.
*/
- if (ovl_check_origin_xattr(ofs->indexdir)) {
- err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+ if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
+ err = ovl_verify_set_fh(ofs, ofs->indexdir,
+ OVL_XATTR_ORIGIN,
upperpath->dentry, true, false);
if (err)
pr_err("failed to verify index dir 'origin' xattr\n");
}
- err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
+ err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
+ true);
if (err)
pr_err("failed to verify index dir 'upper' xattr\n");
@@ -1256,9 +1611,20 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
- if (!ofs->config.nfs_export && !ofs->upper_mnt)
+ if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
return true;
+ /*
+ * We allow using single lower with null uuid for index and nfs_export
+ * for example to support those features with single lower squashfs.
+ * To avoid regressions in setups of overlay with re-formatted lower
+ * squashfs, do not allow decoding origin with lower null uuid unless
+ * user opted-in to one of the new features that require following the
+ * lower inode of non-dir upper.
+ */
+ if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
+ return false;
+
for (i = 0; i < ofs->numfs; i++) {
/*
* We use uuid to associate an overlay lower file handle with a
@@ -1284,6 +1650,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
dev_t dev;
int err;
bool bad_uuid = false;
+ bool warn = false;
for (i = 0; i < ofs->numfs; i++) {
if (ofs->fs[i].sb == sb)
@@ -1292,13 +1659,20 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
bad_uuid = true;
+ if (ofs->config.xino == OVL_XINO_AUTO) {
+ ofs->config.xino = OVL_XINO_OFF;
+ warn = true;
+ }
if (ofs->config.index || ofs->config.nfs_export) {
ofs->config.index = false;
ofs->config.nfs_export = false;
- pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+ warn = true;
+ }
+ if (warn) {
+ pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
uuid_is_null(&sb->s_uuid) ? "null" :
"conflicting",
- path->dentry);
+ path->dentry, ovl_xino_str[ofs->config.xino]);
}
}
@@ -1316,18 +1690,13 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
}
static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
- struct path *stack, unsigned int numlower)
+ struct path *stack, unsigned int numlower,
+ struct ovl_layer *layers)
{
int err;
unsigned int i;
- struct ovl_layer *layers;
err = -ENOMEM;
- layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
- if (!layers)
- goto out;
- ofs->layers = layers;
-
ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
if (ofs->fs == NULL)
goto out;
@@ -1335,11 +1704,6 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
ofs->numfs++;
- layers[0].mnt = ofs->upper_mnt;
- layers[0].idx = 0;
- layers[0].fsid = 0;
- ofs->numlayer = 1;
-
/*
* All lower layers that share the same fs as upper layer, use the same
* pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
@@ -1352,8 +1716,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
goto out;
}
- if (ofs->upper_mnt) {
- ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
+ if (ovl_upper_mnt(ofs)) {
+ ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
ofs->fs[0].is_lower = false;
}
@@ -1366,14 +1730,23 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (err < 0)
goto out;
+ /*
+ * Check if lower root conflicts with this overlay layers before
+ * checking if it is in-use as upperdir/workdir of "another"
+ * mount, because we do not bother to check in ovl_is_inuse() if
+ * the upperdir/workdir is in fact in-use by our
+ * upperdir/workdir.
+ */
err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
if (err)
goto out;
if (ovl_is_inuse(stack[i].dentry)) {
err = ovl_report_in_use(ofs, "lowerdir");
- if (err)
+ if (err) {
+ iput(trap);
goto out;
+ }
}
mnt = clone_private_mount(&stack[i]);
@@ -1401,26 +1774,29 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
/*
* When all layers on same fs, overlay can use real inode numbers.
- * With mount option "xino=on", mounter declares that there are enough
- * free high bits in underlying fs to hold the unique fsid.
+ * With mount option "xino=<on|auto>", mounter declares that there are
+ * enough free high bits in underlying fs to hold the unique fsid.
* If overlayfs does encounter underlying inodes using the high xino
* bits reserved for fsid, it emits a warning and uses the original
- * inode number.
+ * inode number or a non persistent inode number allocated from a
+ * dedicated range.
*/
- if (ofs->numfs - !ofs->upper_mnt == 1) {
+ if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
if (ofs->config.xino == OVL_XINO_ON)
pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
ofs->xino_mode = 0;
} else if (ofs->config.xino == OVL_XINO_OFF) {
ofs->xino_mode = -1;
- } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
+ } else if (ofs->xino_mode < 0) {
/*
* This is a roundup of number of bits needed for encoding
- * fsid, where fsid 0 is reserved for upper fs even with
- * lower only overlay.
+ * fsid, where fsid 0 is reserved for upper fs (even with
+ * lower only overlay) +1 extra bit is reserved for the non
+ * persistent inode number range that is used for resolving
+ * xino lower bits overflow.
*/
- BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
- ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
+ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
+ ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
}
if (ofs->xino_mode > 0) {
@@ -1434,45 +1810,26 @@ out:
}
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
- struct ovl_fs *ofs)
+ const char *lower, unsigned int numlower,
+ struct ovl_fs *ofs, struct ovl_layer *layers)
{
int err;
- char *lowertmp, *lower;
struct path *stack = NULL;
- unsigned int stacklen, numlower = 0, i;
- bool remote = false;
+ unsigned int i;
struct ovl_entry *oe;
- err = -ENOMEM;
- lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
- if (!lowertmp)
- goto out_err;
-
- err = -EINVAL;
- stacklen = ovl_split_lowerdirs(lowertmp);
- if (stacklen > OVL_MAX_STACK) {
- pr_err("too many lower directories, limit is %d\n",
- OVL_MAX_STACK);
- goto out_err;
- } else if (!ofs->config.upperdir && stacklen == 1) {
+ if (!ofs->config.upperdir && numlower == 1) {
pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
- goto out_err;
- } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
- ofs->config.redirect_follow) {
- pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
- ofs->config.nfs_export = false;
+ return ERR_PTR(-EINVAL);
}
- err = -ENOMEM;
- stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
+ stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
if (!stack)
- goto out_err;
+ return ERR_PTR(-ENOMEM);
err = -EINVAL;
- lower = lowertmp;
- for (numlower = 0; numlower < stacklen; numlower++) {
- err = ovl_lower_dir(lower, &stack[numlower], ofs,
- &sb->s_stack_depth, &remote);
+ for (i = 0; i < numlower; i++) {
+ err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
if (err)
goto out_err;
@@ -1486,7 +1843,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
goto out_err;
}
- err = ovl_get_layers(sb, ofs, stack, numlower);
+ err = ovl_get_layers(sb, ofs, stack, numlower, layers);
if (err)
goto out_err;
@@ -1500,16 +1857,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
oe->lowerstack[i].layer = &ofs->layers[i+1];
}
- if (remote)
- sb->s_d_op = &ovl_reval_dentry_operations;
- else
- sb->s_d_op = &ovl_dentry_operations;
-
out:
for (i = 0; i < numlower; i++)
path_put(&stack[i]);
kfree(stack);
- kfree(lowertmp);
return oe;
@@ -1524,7 +1875,8 @@ out_err:
* - upper/work dir of any overlayfs instance
*/
static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
- struct dentry *dentry, const char *name)
+ struct dentry *dentry, const char *name,
+ bool is_lower)
{
struct dentry *next = dentry, *parent;
int err = 0;
@@ -1536,7 +1888,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
/* Walk back ancestors to root (inclusive) looking for traps */
while (!err && parent != next) {
- if (ovl_lookup_trap_inode(sb, parent)) {
+ if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
err = -ELOOP;
pr_err("overlapping %s path\n", name);
} else if (ovl_is_inuse(parent)) {
@@ -1560,9 +1912,9 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
{
int i, err;
- if (ofs->upper_mnt) {
- err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
- "upperdir");
+ if (ovl_upper_mnt(ofs)) {
+ err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
+ "upperdir", false);
if (err)
return err;
@@ -1573,7 +1925,8 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
* workbasedir. In that case, we already have their traps in
* inode cache and we will catch that case on lookup.
*/
- err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
+ err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
+ false);
if (err)
return err;
}
@@ -1581,7 +1934,7 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
for (i = 1; i < ofs->numlayer; i++) {
err = ovl_check_layer(sb, ofs,
ofs->layers[i].mnt->mnt_root,
- "lowerdir");
+ "lowerdir", true);
if (err)
return err;
}
@@ -1589,25 +1942,77 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
return 0;
}
+static struct dentry *ovl_get_root(struct super_block *sb,
+ struct dentry *upperdentry,
+ struct ovl_entry *oe)
+{
+ struct dentry *root;
+ struct ovl_path *lowerpath = &oe->lowerstack[0];
+ unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
+ int fsid = lowerpath->layer->fsid;
+ struct ovl_inode_params oip = {
+ .upperdentry = upperdentry,
+ .lowerpath = lowerpath,
+ };
+
+ root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
+ if (!root)
+ return NULL;
+
+ root->d_fsdata = oe;
+
+ if (upperdentry) {
+ /* Root inode uses upper st_ino/i_ino */
+ ino = d_inode(upperdentry)->i_ino;
+ fsid = 0;
+ ovl_dentry_set_upper_alias(root);
+ if (ovl_is_impuredir(sb, upperdentry))
+ ovl_set_flag(OVL_IMPURE, d_inode(root));
+ }
+
+ /* Root is always merge -> can have whiteouts */
+ ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
+ ovl_dentry_set_flag(OVL_E_CONNECTED, root);
+ ovl_set_upperdata(d_inode(root));
+ ovl_inode_init(d_inode(root), &oip, ino, fsid);
+ ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
+
+ return root;
+}
+
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { };
struct dentry *root_dentry;
struct ovl_entry *oe;
struct ovl_fs *ofs;
+ struct ovl_layer *layers;
struct cred *cred;
+ char *splitlower = NULL;
+ unsigned int numlower;
int err;
+ err = -EIO;
+ if (WARN_ON(sb->s_user_ns != current_user_ns()))
+ goto out;
+
+ sb->s_d_op = &ovl_dentry_operations;
+
err = -ENOMEM;
ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
if (!ofs)
goto out;
+ err = -ENOMEM;
ofs->creator_cred = cred = prepare_creds();
if (!cred)
goto out_err;
+ /* Is there a reason anyone would want not to share whiteouts? */
+ ofs->share_whiteout = true;
+
ofs->config.index = ovl_index_def;
+ ofs->config.uuid = true;
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
@@ -1622,9 +2027,32 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
goto out_err;
}
+ err = -ENOMEM;
+ splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
+ if (!splitlower)
+ goto out_err;
+
+ err = -EINVAL;
+ numlower = ovl_split_lowerdirs(splitlower);
+ if (numlower > OVL_MAX_STACK) {
+ pr_err("too many lower directories, limit is %d\n",
+ OVL_MAX_STACK);
+ goto out_err;
+ }
+
+ err = -ENOMEM;
+ layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+ if (!layers)
+ goto out_err;
+
+ ofs->layers = layers;
+ /* Layer 0 is reserved for upper even if there's no upper */
+ ofs->numlayer = 1;
+
sb->s_stack_depth = 0;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
+ atomic_long_set(&ofs->last_ino, 1);
+ /* Assume underlying fs uses 32bit inodes unless proven otherwise */
if (ofs->config.xino != OVL_XINO_OFF) {
ofs->xino_mode = BITS_PER_LONG - 32;
if (!ofs->xino_mode) {
@@ -1637,15 +2065,28 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ovl_super_operations;
if (ofs->config.upperdir) {
+ struct super_block *upper_sb;
+
+ err = -EINVAL;
if (!ofs->config.workdir) {
pr_err("missing 'workdir'\n");
goto out_err;
}
- err = ovl_get_upper(sb, ofs, &upperpath);
+ err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
if (err)
goto out_err;
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+ if (!ovl_should_sync(ofs)) {
+ ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
+ if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
+ err = -EIO;
+ pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
+ goto out_err;
+ }
+ }
+
err = ovl_get_workdir(sb, ofs, &upperpath);
if (err)
goto out_err;
@@ -1653,31 +2094,31 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ofs->workdir)
sb->s_flags |= SB_RDONLY;
- sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
- sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
-
+ sb->s_stack_depth = upper_sb->s_stack_depth;
+ sb->s_time_gran = upper_sb->s_time_gran;
}
- oe = ovl_get_lowerstack(sb, ofs);
+ oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
err = PTR_ERR(oe);
if (IS_ERR(oe))
goto out_err;
/* If the upper fs is nonexistent, we mark overlayfs r/o too */
- if (!ofs->upper_mnt)
+ if (!ovl_upper_mnt(ofs))
sb->s_flags |= SB_RDONLY;
- if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
+ if (!ofs->config.uuid && ofs->numfs > 1) {
+ pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
+ ofs->config.uuid = true;
+ }
+
+ if (!ovl_force_readonly(ofs) && ofs->config.index) {
err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
if (err)
goto out_free_oe;
/* Force r/o mount with no index dir */
- if (!ofs->indexdir) {
- dput(ofs->workdir);
- ofs->workdir = NULL;
+ if (!ofs->indexdir)
sb->s_flags |= SB_RDONLY;
- }
-
}
err = ovl_check_overlapping_layers(sb, ofs);
@@ -1687,7 +2128,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
/* Show index=off in /proc/mounts for forced r/o mount */
if (!ofs->indexdir) {
ofs->config.index = false;
- if (ofs->upper_mnt && ofs->config.nfs_export) {
+ if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
@@ -1705,30 +2146,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
- sb->s_xattr = ovl_xattr_handlers;
+ sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
+ ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
sb->s_flags |= SB_POSIXACL;
+ sb->s_iflags |= SB_I_SKIP_SYNC;
err = -ENOMEM;
- root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
+ root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
if (!root_dentry)
goto out_free_oe;
- root_dentry->d_fsdata = oe;
-
mntput(upperpath.mnt);
- if (upperpath.dentry) {
- ovl_dentry_set_upper_alias(root_dentry);
- if (ovl_is_impuredir(upperpath.dentry))
- ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
- }
-
- /* Root is always merge -> can have whiteouts */
- ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
- ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
- ovl_set_upperdata(d_inode(root_dentry));
- ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
- ovl_dentry_lower(root_dentry), NULL);
+ kfree(splitlower);
sb->s_root = root_dentry;
@@ -1738,6 +2168,7 @@ out_free_oe:
ovl_entry_stack_free(oe);
kfree(oe);
out_err:
+ kfree(splitlower);
path_put(&upperpath);
ovl_free_fs(ofs);
out:
@@ -1753,6 +2184,7 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
static struct file_system_type ovl_fs_type = {
.owner = THIS_MODULE,
.name = "overlay",
+ .fs_flags = FS_USERNS_MOUNT,
.mount = ovl_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 042f7eb4f7f4..81a57a8d80d9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,6 +10,7 @@
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
+#include <linux/fileattr.h>
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
@@ -18,13 +19,13 @@
int ovl_want_write(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- return mnt_want_write(ofs->upper_mnt);
+ return mnt_want_write(ovl_upper_mnt(ofs));
}
void ovl_drop_write(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- mnt_drop_write(ofs->upper_mnt);
+ mnt_drop_write(ovl_upper_mnt(ofs));
}
struct dentry *ovl_workdir(struct dentry *dentry)
@@ -50,6 +51,9 @@ const struct cred *ovl_override_creds(struct super_block *sb)
*/
int ovl_can_decode_fh(struct super_block *sb)
{
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return 0;
+
if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
return 0;
@@ -93,8 +97,24 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
bool ovl_dentry_remote(struct dentry *dentry)
{
return dentry->d_flags &
- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
- DCACHE_OP_REAL);
+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+}
+
+void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
+ unsigned int mask)
+{
+ struct ovl_entry *oe = OVL_E(dentry);
+ unsigned int i, flags = 0;
+
+ if (upperdentry)
+ flags |= upperdentry->d_flags;
+ for (i = 0; i < oe->numlower; i++)
+ flags |= oe->lowerstack[i].dentry->d_flags;
+
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~mask;
+ dentry->d_flags |= flags & mask;
+ spin_unlock(&dentry->d_lock);
}
bool ovl_dentry_weird(struct dentry *dentry)
@@ -134,7 +154,7 @@ void ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- path->mnt = ofs->upper_mnt;
+ path->mnt = ovl_upper_mnt(ofs);
path->dentry = ovl_dentry_upper(dentry);
}
@@ -174,6 +194,20 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
return type;
}
+enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
+{
+ enum ovl_path_type type = ovl_path_type(dentry);
+
+ WARN_ON_ONCE(d_is_dir(dentry));
+
+ if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type))
+ ovl_path_lowerdata(dentry, path);
+ else
+ ovl_path_upper(dentry, path);
+
+ return type;
+}
+
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
@@ -195,7 +229,7 @@ const struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
/*
* ovl_dentry_lower() could return either a data dentry or metacopy dentry
- * dependig on what is stored in lowerstack[0]. At times we need to find
+ * depending on what is stored in lowerstack[0]. At times we need to find
* lower dentry which has data (and not metacopy dentry). This helper
* returns the lower data dentry.
*/
@@ -216,6 +250,17 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode)
return ovl_upperdentry_dereference(OVL_I(inode));
}
+void ovl_i_path_real(struct inode *inode, struct path *path)
+{
+ path->dentry = ovl_i_dentry_upper(inode);
+ if (!path->dentry) {
+ path->dentry = OVL_I(inode)->lowerpath.dentry;
+ path->mnt = OVL_I(inode)->lowerpath.layer->mnt;
+ } else {
+ path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
+ }
+}
+
struct inode *ovl_inode_upper(struct inode *inode)
{
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
@@ -225,7 +270,9 @@ struct inode *ovl_inode_upper(struct inode *inode)
struct inode *ovl_inode_lower(struct inode *inode)
{
- return OVL_I(inode)->lower;
+ struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry;
+
+ return lowerdentry ? d_inode(lowerdentry) : NULL;
}
struct inode *ovl_inode_real(struct inode *inode)
@@ -386,24 +433,6 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
oi->redirect = redirect;
}
-void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry, struct dentry *lowerdata)
-{
- struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
-
- if (upperdentry)
- OVL_I(inode)->__upperdentry = upperdentry;
- if (lowerdentry)
- OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
- if (lowerdata)
- OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
-
- ovl_copyattr(realinode, inode);
- ovl_copyflags(realinode, inode);
- if (!inode->i_ino)
- inode->i_ino = realinode->i_ino;
-}
-
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
struct inode *upperinode = d_inode(upperdentry);
@@ -416,34 +445,34 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
smp_wmb();
OVL_I(inode)->__upperdentry = upperdentry;
if (inode_unhashed(inode)) {
- if (!inode->i_ino)
- inode->i_ino = upperinode->i_ino;
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
}
-static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
+static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
+ WARN_ON(!d_is_dir(dentry));
/*
- * Version is used by readdir code to keep cache consistent. For merge
- * dirs all changes need to be noted. For non-merge dirs, cache only
- * contains impure (ones which have been copied up and have origins)
- * entries, so only need to note changes to impure entries.
+ * Version is used by readdir code to keep cache consistent.
+ * For merge dirs (or dirs with origin) all changes need to be noted.
+ * For non-merge dirs, cache contains only impure entries (i.e. ones
+ * which have been copied up and have origins), so only need to note
+ * changes to impure entries.
*/
- if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity)
+ if (!ovl_dir_is_real(dentry) || impurity)
OVL_I(inode)->version++;
}
void ovl_dir_modified(struct dentry *dentry, bool impurity)
{
/* Copy mtime/ctime */
- ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+ ovl_copyattr(d_inode(dentry));
- ovl_dentry_version_inc(dentry, impurity);
+ ovl_dir_version_inc(dentry, impurity);
}
u64 ovl_dentry_version_get(struct dentry *dentry)
@@ -461,9 +490,35 @@ bool ovl_is_whiteout(struct dentry *dentry)
return inode && IS_WHITEOUT(inode);
}
-struct file *ovl_path_open(struct path *path, int flags)
+struct file *ovl_path_open(const struct path *path, int flags)
{
- return dentry_open(path, flags | O_NOATIME, current_cred());
+ struct inode *inode = d_inode(path->dentry);
+ struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt);
+ int err, acc_mode;
+
+ if (flags & ~(O_ACCMODE | O_LARGEFILE))
+ BUG();
+
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ acc_mode = MAY_READ;
+ break;
+ case O_WRONLY:
+ acc_mode = MAY_WRITE;
+ break;
+ default:
+ BUG();
+ }
+
+ err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN);
+ if (err)
+ return ERR_PTR(err);
+
+ /* O_NOATIME is an optimization, don't fail if not permitted */
+ if (inode_owner_or_capable(real_mnt_userns, inode))
+ flags |= O_NOATIME;
+
+ return dentry_open(path, flags, current_cred());
}
/* Caller should hold ovl_inode->lock */
@@ -523,11 +578,11 @@ void ovl_copy_up_end(struct dentry *dentry)
ovl_inode_unlock(d_inode(dentry));
}
-bool ovl_check_origin_xattr(struct dentry *dentry)
+bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
{
int res;
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0);
/* Zero size value means "copied up but origin unknown" */
if (res >= 0)
@@ -536,35 +591,59 @@ bool ovl_check_origin_xattr(struct dentry *dentry)
return false;
}
-bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+ enum ovl_xattr ox)
{
int res;
char val;
- if (!d_is_dir(dentry))
+ if (!d_is_dir(path->dentry))
return false;
- res = vfs_getxattr(dentry, name, &val, 1);
+ res = ovl_path_getxattr(ofs, path, ox, &val, 1);
if (res == 1 && val == 'y')
return true;
return false;
}
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
- const char *name, const void *value, size_t size,
+#define OVL_XATTR_OPAQUE_POSTFIX "opaque"
+#define OVL_XATTR_REDIRECT_POSTFIX "redirect"
+#define OVL_XATTR_ORIGIN_POSTFIX "origin"
+#define OVL_XATTR_IMPURE_POSTFIX "impure"
+#define OVL_XATTR_NLINK_POSTFIX "nlink"
+#define OVL_XATTR_UPPER_POSTFIX "upper"
+#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
+#define OVL_XATTR_PROTATTR_POSTFIX "protattr"
+
+#define OVL_XATTR_TAB_ENTRY(x) \
+ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
+ [true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
+
+const char *const ovl_xattr_table[][2] = {
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+ OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+};
+
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
+ enum ovl_xattr ox, const void *value, size_t size,
int xerr)
{
int err;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (ofs->noxattr)
return xerr;
- err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+ err = ovl_setxattr(ofs, upperdentry, ox, value, size);
if (err == -EOPNOTSUPP) {
- pr_warn("cannot set %s xattr on upper\n", name);
+ pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox));
ofs->noxattr = true;
return xerr;
}
@@ -574,6 +653,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
int err;
if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
@@ -583,27 +663,93 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
* Do not fail when upper doesn't support xattrs.
* Upper inodes won't have origin nor redirect xattr anyway.
*/
- err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
- "y", 1, 0);
+ err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
if (!err)
ovl_set_flag(OVL_IMPURE, d_inode(dentry));
return err;
}
-void ovl_set_flag(unsigned long flag, struct inode *inode)
-{
- set_bit(flag, &OVL_I(inode)->flags);
-}
-void ovl_clear_flag(unsigned long flag, struct inode *inode)
+#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper)
{
- clear_bit(flag, &OVL_I(inode)->flags);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+ char buf[OVL_PROTATTR_MAX+1];
+ int res, n;
+
+ res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf,
+ OVL_PROTATTR_MAX);
+ if (res < 0)
+ return;
+
+ /*
+ * Initialize inode flags from overlay.protattr xattr and upper inode
+ * flags. If upper inode has those fileattr flags set (i.e. from old
+ * kernel), we do not clear them on ovl_get_inode(), but we will clear
+ * them on next fileattr_set().
+ */
+ for (n = 0; n < res; n++) {
+ if (buf[n] == 'a')
+ iflags |= S_APPEND;
+ else if (buf[n] == 'i')
+ iflags |= S_IMMUTABLE;
+ else
+ break;
+ }
+
+ if (!res || n < res) {
+ pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
+ upper, res);
+ } else {
+ inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+ }
}
-bool ovl_test_flag(unsigned long flag, struct inode *inode)
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+ struct fileattr *fa)
{
- return test_bit(flag, &OVL_I(inode)->flags);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+ char buf[OVL_PROTATTR_MAX];
+ int len = 0, err = 0;
+ u32 iflags = 0;
+
+ BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
+
+ if (fa->flags & FS_APPEND_FL) {
+ buf[len++] = 'a';
+ iflags |= S_APPEND;
+ }
+ if (fa->flags & FS_IMMUTABLE_FL) {
+ buf[len++] = 'i';
+ iflags |= S_IMMUTABLE;
+ }
+
+ /*
+ * Do not allow to set protection flags when upper doesn't support
+ * xattrs, because we do not set those fileattr flags on upper inode.
+ * Remove xattr if it exist and all protection flags are cleared.
+ */
+ if (len) {
+ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
+ buf, len, -EPERM);
+ } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
+ err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
+ if (err == -EOPNOTSUPP || err == -ENODATA)
+ err = 0;
+ }
+ if (err)
+ return err;
+
+ inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+
+ /* Mask out the fileattr flags that should not be set in upper inode */
+ fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
+ fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
+
+ return 0;
}
/**
@@ -673,6 +819,7 @@ bool ovl_need_index(struct dentry *dentry)
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -682,7 +829,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
struct qstr name = { };
int err;
- err = ovl_get_index_name(lowerdentry, &name);
+ err = ovl_get_index_name(ofs, lowerdentry, &name);
if (err)
goto fail;
@@ -705,16 +852,17 @@ static void ovl_cleanup_index(struct dentry *dentry)
}
inode_lock_nested(dir, I_MUTEX_PARENT);
- index = lookup_one_len(name.name, indexdir, name.len);
+ index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+ dir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(dir, index);
+ err = ovl_cleanup(ofs, dir, index);
}
inode_unlock(dir);
@@ -823,18 +971,25 @@ err:
}
/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
-int ovl_check_metacopy_xattr(struct dentry *dentry)
+int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path)
{
int res;
/* Only regular files can have metacopy xattr */
- if (!S_ISREG(d_inode(dentry)->i_mode))
+ if (!S_ISREG(d_inode(path->dentry)->i_mode))
return 0;
- res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
+ /*
+ * getxattr on user.* may fail with EACCES in case there's no
+ * read permission on the inode. Not much we can do, other than
+ * tell the caller that this is not a metacopy inode.
+ */
+ if (ofs->config.userxattr && res == -EACCES)
+ return 0;
goto out;
}
@@ -860,49 +1015,26 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry)
return (oe->numlower > 1);
}
-ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value,
- size_t padding)
-{
- ssize_t res;
- char *buf = NULL;
-
- res = vfs_getxattr(dentry, name, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return -ENODATA;
- goto fail;
- }
-
- if (res != 0) {
- buf = kzalloc(res + padding, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = vfs_getxattr(dentry, name, buf, res);
- if (res < 0)
- goto fail;
- }
- *value = buf;
-
- return res;
-
-fail:
- pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n",
- name, res);
- kfree(buf);
- return res;
-}
-
-char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding)
{
int res;
char *s, *next, *buf = NULL;
- res = ovl_getxattr(dentry, OVL_XATTR_REDIRECT, &buf, padding + 1);
- if (res == -ENODATA)
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0);
+ if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
if (res < 0)
- return ERR_PTR(res);
+ goto fail;
+ if (res == 0)
+ goto invalid;
+
+ buf = kzalloc(res + padding + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res);
+ if (res < 0)
+ goto fail;
if (res == 0)
goto invalid;
@@ -921,6 +1053,67 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
invalid:
pr_warn_ratelimited("invalid redirect (%s)\n", buf);
res = -EINVAL;
+ goto err_free;
+fail:
+ pr_warn_ratelimited("failed to get redirect (%i)\n", res);
+err_free:
kfree(buf);
return ERR_PTR(res);
}
+
+/*
+ * ovl_sync_status() - Check fs sync status for volatile mounts
+ *
+ * Returns 1 if this is not a volatile mount and a real sync is required.
+ *
+ * Returns 0 if syncing can be skipped because mount is volatile, and no errors
+ * have occurred on the upperdir since the mount.
+ *
+ * Returns -errno if it is a volatile mount, and the error that occurred since
+ * the last mount. If the error code changes, it'll return the latest error
+ * code.
+ */
+
+int ovl_sync_status(struct ovl_fs *ofs)
+{
+ struct vfsmount *mnt;
+
+ if (ovl_should_sync(ofs))
+ return 1;
+
+ mnt = ovl_upper_mnt(ofs);
+ if (!mnt)
+ return 0;
+
+ return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
+}
+
+/*
+ * ovl_copyattr() - copy inode attributes from layer to ovl inode
+ *
+ * When overlay copies inode information from an upper or lower layer to the
+ * relevant overlay inode it will apply the idmapping of the upper or lower
+ * layer when doing so ensuring that the ovl inode ownership will correctly
+ * reflect the ownership of the idmapped upper or lower layer. For example, an
+ * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
+ * map any lower or upper inode owned by id 1001 to id 1000. These mapping
+ * helpers are nops when the relevant layer isn't idmapped.
+ */
+void ovl_copyattr(struct inode *inode)
+{
+ struct path realpath;
+ struct inode *realinode;
+ struct user_namespace *real_mnt_userns;
+
+ ovl_i_path_real(inode, &realpath);
+ realinode = d_inode(realpath.dentry);
+ real_mnt_userns = mnt_user_ns(realpath.mnt);
+
+ inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode);
+ inode->i_gid = i_gid_into_mnt(real_mnt_userns, realinode);
+ inode->i_mode = realinode->i_mode;
+ inode->i_atime = realinode->i_atime;
+ inode->i_mtime = realinode->i_mtime;
+ inode->i_ctime = realinode->i_ctime;
+ i_size_write(inode, i_size_read(realinode));
+}