diff options
Diffstat (limited to '')
-rw-r--r-- | fs/overlayfs/Kconfig | 6 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 379 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 271 | ||||
-rw-r--r-- | fs/overlayfs/export.c | 86 | ||||
-rw-r--r-- | fs/overlayfs/file.c | 381 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 505 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 275 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 419 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 29 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 261 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 920 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 395 |
12 files changed, 2669 insertions, 1258 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 714c14c47ca5..dd188c7996b3 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -9,7 +9,7 @@ config OVERLAY_FS 'lower' filesystem is either hidden or, in the case of directories, merged with the 'upper' object. - For more information see Documentation/filesystems/overlayfs.txt + For more information see Documentation/filesystems/overlayfs.rst config OVERLAY_FS_REDIRECT_DIR bool "Overlayfs: turn on redirect directory feature by default" @@ -38,7 +38,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW If backward compatibility is not an issue, then it is safe and recommended to say N here. - For more information, see Documentation/filesystems/overlayfs.txt + For more information, see Documentation/filesystems/overlayfs.rst If unsure, say Y. @@ -103,7 +103,7 @@ config OVERLAY_FS_XINO_AUTO If compatibility with applications that expect 32bit inodes is not an issue, then it is safe and recommended to say Y here. - For more information, see Documentation/filesystems/overlayfs.txt + For more information, see Documentation/filesystems/overlayfs.rst If unsure, say N. diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9fc47c2e078d..f436d8847f08 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/fileattr.h> #include <linux/splice.h> #include <linux/xattr.h> #include <linux/security.h> @@ -36,11 +37,19 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param) module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); -int ovl_copy_xattr(struct dentry *old, struct dentry *new) +static bool ovl_must_copy_xattr(const char *name) { + return !strcmp(name, XATTR_POSIX_ACL_ACCESS) || + !strcmp(name, XATTR_POSIX_ACL_DEFAULT) || + !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); +} + +int ovl_copy_xattr(struct super_block *sb, const struct path *oldpath, struct dentry *new) +{ + struct dentry *old = oldpath->dentry; ssize_t list_size, size, value_size = 0; char *buf, *name, *value = NULL; - int uninitialized_var(error); + int error = 0; size_t slen; if (!(old->d_inode->i_opflags & IOP_XATTR) || @@ -54,7 +63,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) return list_size; } - buf = kzalloc(list_size, GFP_KERNEL); + buf = kvzalloc(list_size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -74,12 +83,20 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) } list_size -= slen; - if (ovl_is_private_xattr(name)) + if (ovl_is_private_xattr(sb, name)) continue; + + error = security_inode_copy_up_xattr(name); + if (error < 0 && error != -EOPNOTSUPP) + break; + if (error == 1) { + error = 0; + continue; /* Discard */ + } retry: - size = vfs_getxattr(old, name, value, value_size); + size = ovl_do_getxattr(oldpath, name, value, value_size); if (size == -ERANGE) - size = vfs_getxattr(old, name, NULL, 0); + size = ovl_do_getxattr(oldpath, name, NULL, 0); if (size < 0) { error = size; @@ -89,37 +106,98 @@ retry: if (size > value_size) { void *new; - new = krealloc(value, size, GFP_KERNEL); + new = kvmalloc(size, GFP_KERNEL); if (!new) { error = -ENOMEM; break; } + kvfree(value); value = new; value_size = size; goto retry; } - error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { + error = ovl_do_setxattr(OVL_FS(sb), new, name, value, size, 0); + if (error) { + if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name)) + break; + + /* Ignore failure to copy unknown xattrs */ error = 0; - continue; /* Discard */ } - error = vfs_setxattr(new, name, value, size, 0); - if (error) - break; } - kfree(value); + kvfree(value); out: - kfree(buf); + kvfree(buf); return error; } -static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) +static int ovl_copy_fileattr(struct inode *inode, const struct path *old, + const struct path *new) +{ + struct fileattr oldfa = { .flags_valid = true }; + struct fileattr newfa = { .flags_valid = true }; + int err; + + err = ovl_real_fileattr_get(old, &oldfa); + if (err) { + /* Ntfs-3g returns -EINVAL for "no fileattr support" */ + if (err == -ENOTTY || err == -EINVAL) + return 0; + pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", + old->dentry, err); + return err; + } + + /* + * We cannot set immutable and append-only flags on upper inode, + * because we would not be able to link upper inode to upper dir + * not set overlay private xattr on upper inode. + * Store these flags in overlay.protattr xattr instead. + */ + if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { + err = ovl_set_protattr(inode, new->dentry, &oldfa); + if (err == -EPERM) + pr_warn_once("copying fileattr: no xattr on upper\n"); + else if (err) + return err; + } + + /* Don't bother copying flags if none are set */ + if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK)) + return 0; + + err = ovl_real_fileattr_get(new, &newfa); + if (err) { + /* + * Returning an error if upper doesn't support fileattr will + * result in a regression, so revert to the old behavior. + */ + if (err == -ENOTTY || err == -EINVAL) { + pr_warn_once("copying fileattr: no support on upper\n"); + return 0; + } + pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", + new->dentry, err); + return err; + } + + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); + newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; + newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); + + BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK; + newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK); + + return ovl_real_fileattr_set(new, &newfa); +} + +static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, + struct file *new_file, loff_t len) { + struct path datapath; struct file *old_file; - struct file *new_file; loff_t old_pos = 0; loff_t new_pos = 0; loff_t cloned; @@ -128,28 +206,22 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) bool skip_hole = false; int error = 0; - if (len == 0) - return 0; + ovl_path_lowerdata(dentry, &datapath); + if (WARN_ON(datapath.dentry == NULL)) + return -EIO; - old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); + old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY); if (IS_ERR(old_file)) return PTR_ERR(old_file); - new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); - if (IS_ERR(new_file)) { - error = PTR_ERR(new_file); - goto out_fput; - } - /* Try to use clone_file_range to clone up within the same fs */ cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); if (cloned == len) - goto out; + goto out_fput; /* Couldn't clone, so now we try to copy the data */ /* Check if lower fs supports seek operation */ - if (old_file->f_mode & FMODE_LSEEK && - old_file->f_op->llseek) + if (old_file->f_mode & FMODE_LSEEK) skip_hole = true; while (len) { @@ -205,26 +277,26 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) len -= bytes; } -out: - if (!error) + if (!error && ovl_should_sync(ofs)) error = vfs_fsync(new_file, 0); - fput(new_file); out_fput: fput(old_file); return error; } -static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat) +static int ovl_set_size(struct ovl_fs *ofs, + struct dentry *upperdentry, struct kstat *stat) { struct iattr attr = { .ia_valid = ATTR_SIZE, .ia_size = stat->size, }; - return notify_change(upperdentry, &attr, NULL); + return ovl_do_notify_change(ofs, upperdentry, &attr); } -static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) +static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry, + struct kstat *stat) { struct iattr attr = { .ia_valid = @@ -233,10 +305,11 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) .ia_mtime = stat->mtime, }; - return notify_change(upperdentry, &attr, NULL); + return ovl_do_notify_change(ofs, upperdentry, &attr); } -int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) +int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry, + struct kstat *stat) { int err = 0; @@ -245,23 +318,24 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) .ia_valid = ATTR_MODE, .ia_mode = stat->mode, }; - err = notify_change(upperdentry, &attr, NULL); + err = ovl_do_notify_change(ofs, upperdentry, &attr); } if (!err) { struct iattr attr = { .ia_valid = ATTR_UID | ATTR_GID, - .ia_uid = stat->uid, - .ia_gid = stat->gid, + .ia_vfsuid = VFSUIDT_INIT(stat->uid), + .ia_vfsgid = VFSGIDT_INIT(stat->gid), }; - err = notify_change(upperdentry, &attr, NULL); + err = ovl_do_notify_change(ofs, upperdentry, &attr); } if (!err) - ovl_set_timestamps(upperdentry, stat); + ovl_set_timestamps(ofs, upperdentry, stat); return err; } -struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper) +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, + bool is_upper) { struct ovl_fh *fh; int fh_type, dwords; @@ -305,7 +379,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper) if (is_upper) fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER; fh->fb.len = sizeof(fh->fb) + buflen; - fh->fb.uuid = *uuid; + if (ofs->config.uuid) + fh->fb.uuid = *uuid; return fh; @@ -314,7 +389,7 @@ out_err: return ERR_PTR(err); } -int ovl_set_origin(struct dentry *dentry, struct dentry *lower, +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, struct dentry *upper) { const struct ovl_fh *fh = NULL; @@ -326,7 +401,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower, * up and a pure upper inode. */ if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_real_fh(lower, false); + fh = ovl_encode_real_fh(ofs, lower, false); if (IS_ERR(fh)) return PTR_ERR(fh); } @@ -334,24 +409,26 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower, /* * Do not fail when upper doesn't support xattrs. */ - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf, + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); kfree(fh); - return err; + /* Ignore -EPERM from setting "user.*" on symlink/special */ + return err == -EPERM ? 0 : err; } /* Store file handle of @upper dir in @index dir entry */ -static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) +static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *index) { const struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(upper, true); + fh = ovl_encode_real_fh(ofs, upper, true); if (IS_ERR(fh)) return PTR_ERR(fh); - err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0); + err = ovl_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len); kfree(fh); return err; @@ -365,6 +442,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) static int ovl_create_index(struct dentry *dentry, struct dentry *origin, struct dentry *upper) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); struct inode *dir = d_inode(indexdir); struct dentry *index = NULL; @@ -387,29 +465,29 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; - err = ovl_get_index_name(origin, &name); + err = ovl_get_index_name(ofs, origin, &name); if (err) return err; - temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0)); + temp = ovl_create_temp(ofs, indexdir, OVL_CATTR(S_IFDIR | 0)); err = PTR_ERR(temp); if (IS_ERR(temp)) goto free_name; - err = ovl_set_upper_fh(upper, temp); + err = ovl_set_upper_fh(ofs, upper, temp); if (err) goto out; - index = lookup_one_len(name.name, indexdir, name.len); + index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); } else { - err = ovl_do_rename(dir, temp, dir, index, 0); + err = ovl_do_rename(ofs, dir, temp, dir, index, 0); dput(index); } out: if (err) - ovl_cleanup(dir, temp); + ovl_cleanup(ofs, dir, temp); dput(temp); free_name: kfree(name.name); @@ -436,6 +514,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) int err; struct dentry *upper; struct dentry *upperdir = ovl_dentry_upper(c->parent); + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(upperdir); /* Mark parent "impure" because it may now contain non-pure upper */ @@ -448,16 +527,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) return err; inode_lock_nested(udir, I_MUTEX_PARENT); - upper = lookup_one_len(c->dentry->d_name.name, upperdir, - c->dentry->d_name.len); + upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir, + c->dentry->d_name.len); err = PTR_ERR(upper); if (!IS_ERR(upper)) { - err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper); + err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); dput(upper); if (!err) { /* Restore timestamps on parent (best effort) */ - ovl_set_timestamps(upperdir, &c->pstat); + ovl_set_timestamps(ofs, upperdir, &c->pstat); ovl_dentry_set_upper_alias(c->dentry); } } @@ -470,32 +549,46 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) return err; } -static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) +static int ovl_copy_up_data(struct ovl_copy_up_ctx *c, const struct path *temp) { + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct file *new_file; int err; - /* - * Copy up data first and then xattrs. Writing data after - * xattrs will remove security.capability xattr automatically. - */ - if (S_ISREG(c->stat.mode) && !c->metacopy) { - struct path upperpath, datapath; + if (!S_ISREG(c->stat.mode) || c->metacopy || !c->stat.size) + return 0; - ovl_path_upper(c->dentry, &upperpath); - if (WARN_ON(upperpath.dentry != NULL)) - return -EIO; - upperpath.dentry = temp; + new_file = ovl_path_open(temp, O_LARGEFILE | O_WRONLY); + if (IS_ERR(new_file)) + return PTR_ERR(new_file); - ovl_path_lowerdata(c->dentry, &datapath); - err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); - if (err) - return err; - } + err = ovl_copy_up_file(ofs, c->dentry, new_file, c->stat.size); + fput(new_file); + + return err; +} - err = ovl_copy_xattr(c->lowerpath.dentry, temp); +static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) +{ + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct inode *inode = d_inode(c->dentry); + struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = temp }; + int err; + + err = ovl_copy_xattr(c->dentry->d_sb, &c->lowerpath, temp); if (err) return err; + if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { + /* + * Copy the fileattr inode flags that are the source of already + * copied i_flags + */ + err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath); + if (err) + return err; + } + /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. @@ -504,13 +597,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp); + err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } if (c->metacopy) { - err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY, + err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY, NULL, 0, -EOPNOTSUPP); if (err) return err; @@ -518,9 +611,9 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) inode_lock(temp->d_inode); if (S_ISREG(c->stat.mode)) - err = ovl_set_size(temp, &c->stat); + err = ovl_set_size(ofs, temp, &c->stat); if (!err) - err = ovl_set_attr(temp, &c->stat); + err = ovl_set_attr(ofs, temp, &c->stat); inode_unlock(temp->d_inode); return err; @@ -560,8 +653,10 @@ static void ovl_revert_cu_creds(struct ovl_cu_creds *cc) */ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) { + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *inode; struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); + struct path path = { .mnt = ovl_upper_mnt(ofs) }; struct dentry *temp, *upper; struct ovl_cu_creds cc; int err; @@ -572,22 +667,32 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) .link = c->link }; - err = ovl_lock_rename_workdir(c->workdir, c->destdir); - if (err) - return err; + /* workdir and destdir could be the same when copying up to indexdir */ + err = -EIO; + if (lock_rename(c->workdir, c->destdir) != NULL) + goto unlock; err = ovl_prep_cu_creds(c->dentry, &cc); if (err) goto unlock; - temp = ovl_create_temp(c->workdir, &cattr); + temp = ovl_create_temp(ofs, c->workdir, &cattr); ovl_revert_cu_creds(&cc); err = PTR_ERR(temp); if (IS_ERR(temp)) goto unlock; - err = ovl_copy_up_inode(c, temp); + /* + * Copy up data first and then xattrs. Writing data after + * xattrs will remove security.capability xattr automatically. + */ + path.dentry = temp; + err = ovl_copy_up_data(c, &path); + if (err) + goto cleanup; + + err = ovl_copy_up_metadata(c, temp); if (err) goto cleanup; @@ -597,12 +702,13 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) goto cleanup; } - upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); + upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, + c->destname.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto cleanup; - err = ovl_do_rename(wdir, temp, udir, upper, 0); + err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0); dput(upper); if (err) goto cleanup; @@ -619,7 +725,7 @@ unlock: return err; cleanup: - ovl_cleanup(wdir, temp); + ovl_cleanup(ofs, wdir, temp); dput(temp); goto unlock; } @@ -627,8 +733,10 @@ cleanup: /* Copyup using O_TMPFILE which does not require cross dir locking */ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) { + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(c->destdir); struct dentry *temp, *upper; + struct file *tmpfile; struct ovl_cu_creds cc; int err; @@ -636,37 +744,43 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (err) return err; - temp = ovl_do_tmpfile(c->workdir, c->stat.mode); + tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); ovl_revert_cu_creds(&cc); - if (IS_ERR(temp)) - return PTR_ERR(temp); + if (IS_ERR(tmpfile)) + return PTR_ERR(tmpfile); - err = ovl_copy_up_inode(c, temp); + temp = tmpfile->f_path.dentry; + if (!c->metacopy && c->stat.size) { + err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size); + if (err) + return err; + } + + err = ovl_copy_up_metadata(c, temp); if (err) - goto out_dput; + goto out_fput; inode_lock_nested(udir, I_MUTEX_PARENT); - upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); + upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir, + c->destname.len); err = PTR_ERR(upper); if (!IS_ERR(upper)) { - err = ovl_do_link(temp, udir, upper); + err = ovl_do_link(ofs, temp, udir, upper); dput(upper); } inode_unlock(udir); if (err) - goto out_dput; + goto out_fput; if (!c->metacopy) ovl_set_upperdata(d_inode(c->dentry)); - ovl_inode_update(d_inode(c->dentry), temp); - - return 0; + ovl_inode_update(d_inode(c->dentry), dget(temp)); -out_dput: - dput(temp); +out_fput: + fput(tmpfile); return err; } @@ -682,7 +796,7 @@ out_dput: static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; - struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); bool to_index = false; /* @@ -704,7 +818,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); - err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); + err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); if (err) return err; } else if (WARN_ON(!c->parent)) { @@ -739,7 +853,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) /* Restore timestamps on parent (best effort) */ inode_lock(udir); - ovl_set_timestamps(c->destdir, &c->pstat); + ovl_set_timestamps(ofs, c->destdir, &c->pstat); inode_unlock(udir); ovl_dentry_set_upper_alias(c->dentry); @@ -768,30 +882,50 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode, return true; } +static ssize_t ovl_getxattr_value(const struct path *path, char *name, char **value) +{ + ssize_t res; + char *buf; + + res = ovl_do_getxattr(path, name, NULL, 0); + if (res == -ENODATA || res == -EOPNOTSUPP) + res = 0; + + if (res > 0) { + buf = kzalloc(res, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + res = ovl_do_getxattr(path, name, buf, res); + if (res < 0) + kfree(buf); + else + *value = buf; + } + return res; +} + /* Copy up data of an inode which was copied up metadata only in the past. */ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) { - struct path upperpath, datapath; + struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct path upperpath; int err; char *capability = NULL; - ssize_t uninitialized_var(cap_size); + ssize_t cap_size; ovl_path_upper(c->dentry, &upperpath); if (WARN_ON(upperpath.dentry == NULL)) return -EIO; - ovl_path_lowerdata(c->dentry, &datapath); - if (WARN_ON(datapath.dentry == NULL)) - return -EIO; - if (c->stat.size) { - err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS, - &capability, 0); - if (err < 0 && err != -ENODATA) + err = cap_size = ovl_getxattr_value(&upperpath, XATTR_NAME_CAPS, + &capability); + if (cap_size < 0) goto out; } - err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); + err = ovl_copy_up_data(c, &upperpath); if (err) goto out_free; @@ -800,14 +934,14 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) * don't want that to happen for normal copy-up operation. */ if (capability) { - err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS, + err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS, capability, cap_size, 0); if (err) goto out_free; } - err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY); + err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY); if (err) goto out_free; @@ -882,10 +1016,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return err; } -int ovl_copy_up_flags(struct dentry *dentry, int flags) +static int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; - const struct cred *old_cred = ovl_override_creds(dentry->d_sb); + const struct cred *old_cred; bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); /* @@ -896,6 +1030,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) if (WARN_ON(disconnected && d_is_dir(dentry))) return -EIO; + old_cred = ovl_override_creds(dentry->d_sb); while (!err) { struct dentry *next; struct dentry *parent = NULL; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 8e57d5372b8f..6b03457f72bb 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -23,15 +23,15 @@ MODULE_PARM_DESC(redirect_max, static int ovl_set_redirect(struct dentry *dentry, bool samedir); -int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) +int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry) { int err; dget(wdentry); if (d_is_dir(wdentry)) - err = ovl_do_rmdir(wdir, wdentry); + err = ovl_do_rmdir(ofs, wdir, wdentry); else - err = ovl_do_unlink(wdir, wdentry); + err = ovl_do_unlink(ofs, wdir, wdentry); dput(wdentry); if (err) { @@ -42,7 +42,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) return err; } -static struct dentry *ovl_lookup_temp(struct dentry *workdir) +struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -51,7 +51,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir) /* counter is allowed to wrap, since temp dentries are ephemeral */ snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id)); - temp = lookup_one_len(name, workdir, strlen(name)); + temp = ovl_lookup_upper(ofs, name, workdir, strlen(name)); if (!IS_ERR(temp) && temp->d_inode) { pr_err("workdir/%s already exists\n", name); dput(temp); @@ -62,35 +62,59 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir) } /* caller holds i_mutex on workdir */ -static struct dentry *ovl_whiteout(struct dentry *workdir) +static struct dentry *ovl_whiteout(struct ovl_fs *ofs) { int err; struct dentry *whiteout; + struct dentry *workdir = ofs->workdir; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir); - if (IS_ERR(whiteout)) - return whiteout; + if (!ofs->whiteout) { + whiteout = ovl_lookup_temp(ofs, workdir); + if (IS_ERR(whiteout)) + goto out; - err = ovl_do_whiteout(wdir, whiteout); - if (err) { - dput(whiteout); - whiteout = ERR_PTR(err); + err = ovl_do_whiteout(ofs, wdir, whiteout); + if (err) { + dput(whiteout); + whiteout = ERR_PTR(err); + goto out; + } + ofs->whiteout = whiteout; } + if (ofs->share_whiteout) { + whiteout = ovl_lookup_temp(ofs, workdir); + if (IS_ERR(whiteout)) + goto out; + + err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout); + if (!err) + goto out; + + if (err != -EMLINK) { + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n", + ofs->whiteout->d_inode->i_nlink, err); + ofs->share_whiteout = false; + } + dput(whiteout); + } + whiteout = ofs->whiteout; + ofs->whiteout = NULL; +out: return whiteout; } /* Caller must hold i_mutex on both workdir and dir */ -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - struct inode *wdir = workdir->d_inode; + struct inode *wdir = ofs->workdir->d_inode; struct dentry *whiteout; int err; int flags = 0; - whiteout = ovl_whiteout(workdir); + whiteout = ovl_whiteout(ofs); err = PTR_ERR(whiteout); if (IS_ERR(whiteout)) return err; @@ -98,28 +122,28 @@ int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, if (d_is_dir(dentry)) flags = RENAME_EXCHANGE; - err = ovl_do_rename(wdir, whiteout, dir, dentry, flags); + err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags); if (err) goto kill_whiteout; if (flags) - ovl_cleanup(wdir, dentry); + ovl_cleanup(ofs, wdir, dentry); out: dput(whiteout); return err; kill_whiteout: - ovl_cleanup(wdir, whiteout); + ovl_cleanup(ofs, wdir, whiteout); goto out; } -static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, - umode_t mode) +int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, + struct dentry **newdentry, umode_t mode) { int err; struct dentry *d, *dentry = *newdentry; - err = ovl_do_mkdir(dir, dentry, mode); + err = ovl_do_mkdir(ofs, dir, dentry, mode); if (err) return err; @@ -131,8 +155,8 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, * to it unhashed and negative. If that happens, try to * lookup a new hashed and positive dentry. */ - d = lookup_one_len(dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); + d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, + dentry->d_name.len); if (IS_ERR(d)) { pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", dentry, err); @@ -144,8 +168,8 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, return 0; } -struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, - struct ovl_cattr *attr) +struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, + struct dentry *newdentry, struct ovl_cattr *attr) { int err; @@ -157,28 +181,28 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, goto out; if (attr->hardlink) { - err = ovl_do_link(attr->hardlink, dir, newdentry); + err = ovl_do_link(ofs, attr->hardlink, dir, newdentry); } else { switch (attr->mode & S_IFMT) { case S_IFREG: - err = ovl_do_create(dir, newdentry, attr->mode); + err = ovl_do_create(ofs, dir, newdentry, attr->mode); break; case S_IFDIR: /* mkdir is special... */ - err = ovl_mkdir_real(dir, &newdentry, attr->mode); + err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - err = ovl_do_mknod(dir, newdentry, attr->mode, + err = ovl_do_mknod(ofs, dir, newdentry, attr->mode, attr->rdev); break; case S_IFLNK: - err = ovl_do_symlink(dir, newdentry, attr->link); + err = ovl_do_symlink(ofs, dir, newdentry, attr->link); break; default: @@ -200,18 +224,20 @@ out: return newdentry; } -struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr) +struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, + struct ovl_cattr *attr) { - return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir), - attr); + return ovl_create_real(ofs, d_inode(workdir), + ovl_lookup_temp(ofs, workdir), attr); } static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, int xerr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); @@ -243,6 +269,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, ovl_dir_modified(dentry->d_parent, false); ovl_dentry_set_upper_alias(dentry); + ovl_dentry_update_reval(dentry, newdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + if (!hardlink) { /* * ovl_obtain_alias() can be called after ovl_create_real() @@ -259,6 +288,8 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, inode = ovl_get_inode(dentry->d_sb, &oip); if (IS_ERR(inode)) return PTR_ERR(inode); + if (inode == oip.newinode) + ovl_set_flag(OVL_UPPERDATA, inode); } else { WARN_ON(ovl_inode_real(inode) != d_inode(newdentry)); dput(newdentry); @@ -291,6 +322,7 @@ static bool ovl_type_origin(struct dentry *dentry) static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct ovl_cattr *attr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *udir = upperdir->d_inode; struct dentry *newdentry; @@ -300,16 +332,16 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, attr->mode &= ~current_umask(); inode_lock_nested(udir, I_MUTEX_PARENT); - newdentry = ovl_create_real(udir, - lookup_one_len(dentry->d_name.name, - upperdir, - dentry->d_name.len), + newdentry = ovl_create_real(ofs, udir, + ovl_lookup_upper(ofs, dentry->d_name.name, + upperdir, dentry->d_name.len), attr); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; - if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && + !ovl_allow_offline_changes(ofs)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } @@ -322,7 +354,7 @@ out_unlock: return err; out_cleanup: - ovl_cleanup(udir, newdentry); + ovl_cleanup(ofs, udir, newdentry); dput(newdentry); goto out_unlock; } @@ -330,6 +362,7 @@ out_cleanup: static struct dentry *ovl_clear_empty(struct dentry *dentry, struct list_head *list) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); @@ -360,12 +393,12 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode)); + opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode)); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; - err = ovl_copy_xattr(upper, opaquedir); + err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir); if (err) goto out_cleanup; @@ -374,17 +407,17 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, goto out_cleanup; inode_lock(opaquedir->d_inode); - err = ovl_set_attr(opaquedir, &stat); + err = ovl_set_attr(ofs, opaquedir, &stat); inode_unlock(opaquedir->d_inode); if (err) goto out_cleanup; - err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); + err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE); if (err) goto out_cleanup; - ovl_cleanup_whiteouts(upper, list); - ovl_cleanup(wdir, upper); + ovl_cleanup_whiteouts(ofs, upper, list); + ovl_cleanup(ofs, wdir, upper); unlock_rename(workdir, upperdir); /* dentry's upper doesn't match now, get rid of it */ @@ -393,7 +426,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, return opaquedir; out_cleanup: - ovl_cleanup(wdir, opaquedir); + ovl_cleanup(ofs, wdir, opaquedir); dput(opaquedir); out_unlock: unlock_rename(workdir, upperdir); @@ -401,8 +434,8 @@ out: return ERR_PTR(err); } -static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, - const struct posix_acl *acl) +static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry, + const char *name, const struct posix_acl *acl) { void *buffer; size_t size; @@ -420,7 +453,7 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, if (err < 0) goto out_free; - err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE); + err = ovl_do_setxattr(ofs, upperdentry, name, buffer, size, XATTR_CREATE); out_free: kfree(buffer); return err; @@ -429,6 +462,7 @@ out_free: static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct ovl_cattr *cattr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); @@ -453,8 +487,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto out_unlock; @@ -463,7 +497,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) goto out_dput; - newdentry = ovl_create_temp(workdir, cattr); + newdentry = ovl_create_temp(ofs, workdir, cattr); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_dput; @@ -479,19 +513,19 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, .ia_mode = cattr->mode, }; inode_lock(newdentry->d_inode); - err = notify_change(newdentry, &attr, NULL); + err = ovl_do_notify_change(ofs, newdentry, &attr); inode_unlock(newdentry->d_inode); if (err) goto out_cleanup; } if (!hardlink) { - err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS, - acl); + err = ovl_set_upper_acl(ofs, newdentry, + XATTR_NAME_POSIX_ACL_ACCESS, acl); if (err) goto out_cleanup; - err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT, - default_acl); + err = ovl_set_upper_acl(ofs, newdentry, + XATTR_NAME_POSIX_ACL_DEFAULT, default_acl); if (err) goto out_cleanup; } @@ -501,20 +535,22 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out_cleanup; - err = ovl_do_rename(wdir, newdentry, udir, upper, + err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, RENAME_EXCHANGE); if (err) goto out_cleanup; - ovl_cleanup(wdir, upper); + ovl_cleanup(ofs, wdir, upper); } else { - err = ovl_do_rename(wdir, newdentry, udir, upper, 0); + err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0); if (err) goto out_cleanup; } err = ovl_instantiate(dentry, inode, newdentry, hardlink); - if (err) - goto out_cleanup; + if (err) { + ovl_cleanup(ofs, udir, newdentry); + dput(newdentry); + } out_dput: dput(upper); out_unlock: @@ -527,7 +563,7 @@ out: return err; out_cleanup: - ovl_cleanup(wdir, newdentry); + ovl_cleanup(ofs, wdir, newdentry); dput(newdentry); goto out_dput; } @@ -607,7 +643,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode->i_state |= I_CREATING; spin_unlock(&inode->i_lock); - inode_init_owner(inode, dentry->d_parent->d_inode, mode); + inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; err = ovl_create_or_link(dentry, inode, &attr, false); @@ -621,19 +657,20 @@ out: return err; } -static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); } -static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); } -static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) +static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { /* Don't allow creation of "whiteout" on overlay */ if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) @@ -642,8 +679,8 @@ static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, return ovl_create_object(dentry, mode, rdev, NULL); } -static int ovl_symlink(struct inode *dir, struct dentry *dentry, - const char *link) +static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, const char *link) { return ovl_create_object(dentry, S_IFLNK, 0, link); } @@ -712,6 +749,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) static int ovl_remove_and_whiteout(struct dentry *dentry, struct list_head *list) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *workdir = ovl_workdir(dentry); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct dentry *upper; @@ -732,8 +770,8 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (err) goto out_dput; - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto out_unlock; @@ -745,7 +783,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, goto out_dput_upper; } - err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper); + err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper); if (err) goto out_d_drop; @@ -765,6 +803,7 @@ out: static int ovl_remove_upper(struct dentry *dentry, bool is_dir, struct list_head *list) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; struct dentry *upper; @@ -779,8 +818,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, } inode_lock_nested(dir, I_MUTEX_PARENT); - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto out_unlock; @@ -791,9 +830,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir, goto out_dput_upper; if (is_dir) - err = vfs_rmdir(dir, upper); + err = ovl_do_rmdir(ofs, dir, upper); else - err = vfs_unlink(dir, upper, NULL); + err = ovl_do_unlink(ofs, dir, upper); ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry)); /* @@ -819,11 +858,32 @@ static bool ovl_pure_upper(struct dentry *dentry) !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); } +static void ovl_drop_nlink(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct dentry *alias; + + /* Try to find another, hashed alias */ + spin_lock(&inode->i_lock); + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + if (alias != dentry && !d_unhashed(alias)) + break; + } + spin_unlock(&inode->i_lock); + + /* + * Changes to underlying layers may cause i_nlink to lose sync with + * reality. In this case prevent the link count from going to zero + * prematurely. + */ + if (inode->i_nlink > !!alias) + drop_nlink(inode); +} + static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; const struct cred *old_cred; - struct dentry *upperdentry; bool lower_positive = ovl_lower_positive(dentry); LIST_HEAD(list); @@ -856,7 +916,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (is_dir) clear_nlink(dentry->d_inode); else - drop_nlink(dentry->d_inode); + ovl_drop_nlink(dentry); } ovl_nlink_end(dentry); @@ -866,9 +926,8 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) * Note: we fail to update ctime if there was no copy-up, only a * whiteout */ - upperdentry = ovl_dentry_upper(dentry); - if (upperdentry) - ovl_copyattr(d_inode(upperdentry), d_inode(dentry)); + if (ovl_dentry_upper(dentry)) + ovl_copyattr(d_inode(dentry)); out_drop_write: ovl_drop_write(dentry); @@ -940,8 +999,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect) buflen -= thislen; memcpy(&buf[buflen], name, thislen); - tmp = dget_dlock(d->d_parent); spin_unlock(&d->d_lock); + tmp = dget_parent(d); dput(d); d = tmp; @@ -990,6 +1049,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir) static int ovl_set_redirect(struct dentry *dentry, bool samedir) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); const char *redirect = ovl_dentry_get_redirect(dentry); bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir); @@ -1000,7 +1060,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, redirect, strlen(redirect), -EXDEV); if (!err) { @@ -1017,9 +1077,9 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) return err; } -static int ovl_rename(struct inode *olddir, struct dentry *old, - struct inode *newdir, struct dentry *new, - unsigned int flags) +static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir, + struct dentry *old, struct inode *newdir, + struct dentry *new, unsigned int flags) { int err; struct dentry *old_upperdir; @@ -1037,6 +1097,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; + struct ovl_fs *ofs = OVL_FS(old->d_sb); LIST_HEAD(list); err = -EINVAL; @@ -1131,8 +1192,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, trap = lock_rename(new_upperdir, old_upperdir); - olddentry = lookup_one_len(old->d_name.name, old_upperdir, - old->d_name.len); + olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir, + old->d_name.len); err = PTR_ERR(olddentry); if (IS_ERR(olddentry)) goto out_unlock; @@ -1141,8 +1202,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!ovl_matches_upper(old, olddentry)) goto out_dput_old; - newdentry = lookup_one_len(new->d_name.name, new_upperdir, - new->d_name.len); + newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir, + new->d_name.len); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_dput_old; @@ -1160,9 +1221,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_dput; } } else { - if (!d_is_negative(newdentry) && - (!new_opaque || !ovl_is_whiteout(newdentry))) - goto out_dput; + if (!d_is_negative(newdentry)) { + if (!new_opaque || !ovl_is_whiteout(newdentry)) + goto out_dput; + } else { + if (flags & RENAME_EXCHANGE) + goto out_dput; + } } if (olddentry == trap) @@ -1189,19 +1254,19 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (err) goto out_dput; - err = ovl_do_rename(old_upperdir->d_inode, olddentry, + err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry, new_upperdir->d_inode, newdentry, flags); if (err) goto out_dput; if (cleanup_whiteout) - ovl_cleanup(old_upperdir->d_inode, newdentry); + ovl_cleanup(ofs, old_upperdir->d_inode, newdentry); if (overwrite && d_inode(new)) { if (new_is_dir) clear_nlink(d_inode(new)); else - drop_nlink(d_inode(new)); + ovl_drop_nlink(new); } ovl_dir_modified(old->d_parent, ovl_type_origin(old) || @@ -1210,9 +1275,9 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, (d_inode(new) && ovl_type_origin(new))); /* copy ctime: */ - ovl_copyattr(d_inode(olddentry), d_inode(old)); + ovl_copyattr(d_inode(old)); if (d_inode(new) && ovl_dentry_upper(new)) - ovl_copyattr(d_inode(newdentry), d_inode(new)); + ovl_copyattr(d_inode(new)); out_dput: dput(newdentry); @@ -1248,4 +1313,6 @@ const struct inode_operations ovl_dir_inode_operations = { .listxattr = ovl_listxattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, + .fileattr_get = ovl_fileattr_get, + .fileattr_set = ovl_fileattr_set, }; diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 6f54d70cef27..e065a5b9a442 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -204,14 +204,15 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && ofs->upper_mnt) + if (d_is_dir(dentry) && ovl_upper_mnt(ofs)) return ovl_connect_layer(dentry); /* Lower file handle for indexed and non-upper dir/non-dir */ return 1; } -static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen) +static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, + u32 *fid, int buflen) { struct ovl_fh *fh = NULL; int err, enc_lower; @@ -226,17 +227,14 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen) goto fail; /* Encode an upper or lower file handle */ - fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) : + fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) : ovl_dentry_upper(dentry), !enc_lower); if (IS_ERR(fh)) return PTR_ERR(fh); - err = -EOVERFLOW; len = OVL_FH_LEN(fh); - if (len > buflen) - goto fail; - - memcpy(fid, fh, len); + if (len <= buflen) + memcpy(fid, fh, len); err = len; out: @@ -244,32 +242,34 @@ out: return err; fail: - pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", - dentry, err, buflen, fh ? (int)fh->fb.len : 0, - fh ? fh->fb.type : 0); + pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n", + dentry, err); goto out; } static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, struct inode *parent) { + struct ovl_fs *ofs = OVL_FS(inode->i_sb); struct dentry *dentry; - int bytes = *max_len << 2; + int bytes, buflen = *max_len << 2; /* TODO: encode connectable file handles */ if (parent) return FILEID_INVALID; dentry = d_find_any_alias(inode); - if (WARN_ON(!dentry)) + if (!dentry) return FILEID_INVALID; - bytes = ovl_dentry_to_fid(dentry, fid, bytes); + bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); dput(dentry); if (bytes <= 0) return FILEID_INVALID; *max_len = bytes >> 2; + if (bytes > buflen) + return FILEID_INVALID; return OVL_FILEID_V1; } @@ -308,29 +308,35 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, ovl_set_flag(OVL_UPPERDATA, inode); dentry = d_find_any_alias(inode); - if (!dentry) { - dentry = d_alloc_anon(inode->i_sb); - if (!dentry) - goto nomem; - oe = ovl_alloc_entry(lower ? 1 : 0); - if (!oe) - goto nomem; - - if (lower) { - oe->lowerstack->dentry = dget(lower); - oe->lowerstack->layer = lowerpath->layer; - } - dentry->d_fsdata = oe; - if (upper_alias) - ovl_dentry_set_upper_alias(dentry); + if (dentry) + goto out_iput; + + dentry = d_alloc_anon(inode->i_sb); + if (unlikely(!dentry)) + goto nomem; + oe = ovl_alloc_entry(lower ? 1 : 0); + if (!oe) + goto nomem; + + if (lower) { + oe->lowerstack->dentry = dget(lower); + oe->lowerstack->layer = lowerpath->layer; } + dentry->d_fsdata = oe; + if (upper_alias) + ovl_dentry_set_upper_alias(dentry); + + ovl_dentry_update_reval(dentry, upper, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); return d_instantiate_anon(dentry, inode); nomem: - iput(inode); dput(dentry); - return ERR_PTR(-ENOMEM); + dentry = ERR_PTR(-ENOMEM); +out_iput: + iput(inode); + return dentry; } /* Get the upper or lower dentry in stach whose on layer @idx */ @@ -385,7 +391,13 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, * pointer because we hold no lock on the real dentry. */ take_dentry_name_snapshot(&name, real); + /* + * No mnt_userns handling here: it's an internal lookup. Could skip + * permission checking altogether, but for now just use non-mnt_userns + * transformed ids. + */ this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -400,7 +412,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; @@ -472,7 +483,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, if (IS_ERR_OR_NULL(this)) return this; - if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + if (ovl_dentry_real_at(this, layer->idx) != real) { dput(this); this = ERR_PTR(-EIO); } @@ -673,10 +684,10 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, struct dentry *dentry; struct dentry *upper; - if (!ofs->upper_mnt) + if (!ovl_upper_mnt(ofs)) return ERR_PTR(-EACCES); - upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true); + upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true); if (IS_ERR_OR_NULL(upper)) return upper; @@ -748,7 +759,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, goto out_err; } if (index) { - err = ovl_verify_origin(index, origin.dentry, false); + err = ovl_verify_origin(ofs, index, origin.dentry, false); if (err) goto out_err; } @@ -777,6 +788,9 @@ static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type) if (fh_type != OVL_FILEID_V0) return ERR_PTR(-EINVAL); + if (buflen <= OVL_FH_WIRE_OFFSET) + return ERR_PTR(-EINVAL); + fh = kzalloc(buflen, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 87c362f65448..a1a22f58ba18 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -10,12 +10,14 @@ #include <linux/uio.h> #include <linux/uaccess.h> #include <linux/splice.h> +#include <linux/security.h> #include <linux/mm.h> #include <linux/fs.h> #include "overlayfs.h" struct ovl_aio_req { struct kiocb iocb; + refcount_t ref; struct kiocb *orig_iocb; struct fd fd; }; @@ -32,17 +34,36 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) return 'm'; } +/* No atime modificaton nor notify on underlying */ +#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) + static struct file *ovl_open_realfile(const struct file *file, - struct inode *realinode) + const struct path *realpath) { + struct inode *realinode = d_inode(realpath->dentry); struct inode *inode = file_inode(file); + struct user_namespace *real_mnt_userns; struct file *realfile; const struct cred *old_cred; - int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY; + int flags = file->f_flags | OVL_OPEN_FLAGS; + int acc_mode = ACC_MODE(flags); + int err; + + if (flags & O_APPEND) + acc_mode |= MAY_APPEND; old_cred = ovl_override_creds(inode->i_sb); - realfile = open_with_fake_path(&file->f_path, flags, realinode, - current_cred()); + real_mnt_userns = mnt_user_ns(realpath->mnt); + err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode); + if (err) { + realfile = ERR_PTR(err); + } else { + if (!inode_owner_or_capable(real_mnt_userns, realinode)) + flags &= ~O_NOATIME; + + realfile = open_with_fake_path(&file->f_path, flags, realinode, + current_cred()); + } revert_creds(old_cred); pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", @@ -59,23 +80,13 @@ static int ovl_change_flags(struct file *file, unsigned int flags) struct inode *inode = file_inode(file); int err; - /* No atime modificaton on underlying */ - flags |= O_NOATIME | FMODE_NONOTIFY; - - /* If some flag changed that cannot be changed then something's amiss */ - if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK)) - return -EIO; - flags &= OVL_SETFL_MASK; if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; - if (flags & O_DIRECT) { - if (!file->f_mapping->a_ops || - !file->f_mapping->a_ops->direct_IO) - return -EINVAL; - } + if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; if (file->f_op->check_flags) { err = file->f_op->check_flags(flags); @@ -93,27 +104,27 @@ static int ovl_change_flags(struct file *file, unsigned int flags) static int ovl_real_fdget_meta(const struct file *file, struct fd *real, bool allow_meta) { - struct inode *inode = file_inode(file); - struct inode *realinode; + struct dentry *dentry = file_dentry(file); + struct path realpath; real->flags = 0; real->file = file->private_data; if (allow_meta) - realinode = ovl_inode_real(inode); + ovl_path_real(dentry, &realpath); else - realinode = ovl_inode_realdata(inode); + ovl_path_realdata(dentry, &realpath); /* Has it been copied up since we'd opened it? */ - if (unlikely(file_inode(real->file) != realinode)) { + if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) { real->flags = FDPUT_FPUT; - real->file = ovl_open_realfile(file, realinode); + real->file = ovl_open_realfile(file, &realpath); return PTR_ERR_OR_ZERO(real->file); } /* Did the flags change since open? */ - if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME)) + if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) return ovl_change_flags(real->file, file->f_flags); return 0; @@ -121,22 +132,32 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real, static int ovl_real_fdget(const struct file *file, struct fd *real) { + if (d_is_dir(file_dentry(file))) { + real->flags = 0; + real->file = ovl_dir_real_file(file, false); + + return PTR_ERR_OR_ZERO(real->file); + } + return ovl_real_fdget_meta(file, real, false); } static int ovl_open(struct inode *inode, struct file *file) { + struct dentry *dentry = file_dentry(file); struct file *realfile; + struct path realpath; int err; - err = ovl_maybe_copy_up(file_dentry(file), file->f_flags); + err = ovl_maybe_copy_up(dentry, file->f_flags); if (err) return err; /* No longer need these flags, so don't pass them on to underlying fs */ file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - realfile = ovl_open_realfile(file, ovl_inode_realdata(inode)); + ovl_path_realdata(dentry, &realpath); + realfile = ovl_open_realfile(file, &realpath); if (IS_ERR(realfile)) return PTR_ERR(realfile); @@ -219,9 +240,8 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) +static rwf_t ovl_iocb_to_rwf(int ifl) { - int ifl = iocb->ki_flags; rwf_t flags = 0; if (ifl & IOCB_NOWAIT) @@ -236,6 +256,14 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) return flags; } +static inline void ovl_aio_put(struct ovl_aio_req *aio_req) +{ + if (refcount_dec_and_test(&aio_req->ref)) { + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); + } +} + static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) { struct kiocb *iocb = &aio_req->iocb; @@ -248,22 +276,21 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, SB_FREEZE_WRITE); file_end_write(iocb->ki_filp); - ovl_copyattr(ovl_inode_real(inode), inode); + ovl_copyattr(inode); } orig_iocb->ki_pos = iocb->ki_pos; - fdput(aio_req->fd); - kmem_cache_free(ovl_aio_request_cachep, aio_req); + ovl_aio_put(aio_req); } -static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +static void ovl_aio_rw_complete(struct kiocb *iocb, long res) { struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb); struct kiocb *orig_iocb = aio_req->orig_iocb; ovl_aio_cleanup_handler(aio_req); - orig_iocb->ki_complete(orig_iocb, res, res2); + orig_iocb->ki_complete(orig_iocb, res); } static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) @@ -280,10 +307,15 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) return ret; + ret = -EINVAL; + if (iocb->ki_flags & IOCB_DIRECT && + !(real.file->f_mode & FMODE_CAN_ODIRECT)) + goto out_fdput; + old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + ovl_iocb_to_rwf(iocb->ki_flags)); } else { struct ovl_aio_req *aio_req; @@ -297,14 +329,16 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } out: revert_creds(old_cred); ovl_file_accessed(file); - +out_fdput: fdput(real); return ret; @@ -317,13 +351,14 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) struct fd real; const struct cred *old_cred; ssize_t ret; + int ifl = iocb->ki_flags; if (!iov_iter_count(iter)) return 0; inode_lock(inode); /* Update mode */ - ovl_copyattr(ovl_inode_real(inode), inode); + ovl_copyattr(inode); ret = file_remove_privs(file); if (ret) goto out_unlock; @@ -332,14 +367,22 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) goto out_unlock; + ret = -EINVAL; + if (iocb->ki_flags & IOCB_DIRECT && + !(real.file->f_mode & FMODE_CAN_ODIRECT)) + goto out_fdput; + + if (!ovl_should_sync(OVL_FS(inode->i_sb))) + ifl &= ~(IOCB_DSYNC | IOCB_SYNC); + old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { file_start_write(real.file); ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + ovl_iocb_to_rwf(ifl)); file_end_write(real.file); /* Update size */ - ovl_copyattr(ovl_inode_real(inode), inode); + ovl_copyattr(inode); } else { struct ovl_aio_req *aio_req; @@ -356,13 +399,17 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) real.flags = 0; aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_flags = ifl; aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } out: revert_creds(old_cred); +out_fdput: fdput(real); out_unlock: @@ -371,45 +418,47 @@ out_unlock: return ret; } -static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +/* + * Calling iter_file_splice_write() directly from overlay's f_op may deadlock + * due to lock order inversion between pipe->mutex in iter_file_splice_write() + * and file_start_write(real.file) in ovl_write_iter(). + * + * So do everything ovl_write_iter() does and call iter_file_splice_write() on + * the real file. + */ +static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - ssize_t ret; struct fd real; const struct cred *old_cred; + struct inode *inode = file_inode(out); + ssize_t ret; - ret = ovl_real_fdget(in, &real); + inode_lock(inode); + /* Update mode */ + ovl_copyattr(inode); + ret = file_remove_privs(out); if (ret) - return ret; - - old_cred = ovl_override_creds(file_inode(in)->i_sb); - ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); - revert_creds(old_cred); - - ovl_file_accessed(in); - fdput(real); - return ret; -} - -static ssize_t -ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct fd real; - const struct cred *old_cred; - ssize_t ret; + goto out_unlock; ret = ovl_real_fdget(out, &real); if (ret) - return ret; + goto out_unlock; + + old_cred = ovl_override_creds(inode->i_sb); + file_start_write(real.file); - old_cred = ovl_override_creds(file_inode(out)->i_sb); ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); - revert_creds(old_cred); - ovl_file_accessed(out); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(inode); + revert_creds(old_cred); fdput(real); + +out_unlock: + inode_unlock(inode); + return ret; } @@ -419,6 +468,10 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct cred *old_cred; int ret; + ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); + if (ret <= 0) + return ret; + ret = ovl_real_fdget_meta(file, &real, !datasync); if (ret) return ret; @@ -447,20 +500,11 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; - vma->vm_file = get_file(realfile); + vma_set_file(vma, realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = call_mmap(vma->vm_file, vma); revert_creds(old_cred); - - if (ret) { - /* Drop reference count from new vm_file value */ - fput(realfile); - } else { - /* Drop reference count from previous vm_file value */ - fput(file); - } - ovl_file_accessed(file); return ret; @@ -482,7 +526,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len revert_creds(old_cred); /* Update size */ - ovl_copyattr(ovl_inode_real(inode), inode); + ovl_copyattr(inode); fdput(real); @@ -508,166 +552,6 @@ static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) return ret; } -static long ovl_real_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct fd real; - const struct cred *old_cred; - long ret; - - ret = ovl_real_fdget(file, &real); - if (ret) - return ret; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_ioctl(real.file, cmd, arg); - revert_creds(old_cred); - - fdput(real); - - return ret; -} - -static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int iflags) -{ - long ret; - struct inode *inode = file_inode(file); - unsigned int old_iflags; - - if (!inode_owner_or_capable(inode)) - return -EACCES; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(inode); - - /* Check the capability before cred override */ - ret = -EPERM; - old_iflags = READ_ONCE(inode->i_flags); - if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) && - !capable(CAP_LINUX_IMMUTABLE)) - goto unlock; - - ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY); - if (ret) - goto unlock; - - ret = ovl_real_ioctl(file, cmd, arg); - - ovl_copyflags(ovl_inode_real(inode), inode); -unlock: - inode_unlock(inode); - - mnt_drop_write_file(file); - - return ret; - -} - -static unsigned int ovl_fsflags_to_iflags(unsigned int flags) -{ - unsigned int iflags = 0; - - if (flags & FS_SYNC_FL) - iflags |= S_SYNC; - if (flags & FS_APPEND_FL) - iflags |= S_APPEND; - if (flags & FS_IMMUTABLE_FL) - iflags |= S_IMMUTABLE; - if (flags & FS_NOATIME_FL) - iflags |= S_NOATIME; - - return iflags; -} - -static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd, - unsigned long arg) -{ - unsigned int flags; - - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - - return ovl_ioctl_set_flags(file, cmd, arg, - ovl_fsflags_to_iflags(flags)); -} - -static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags) -{ - unsigned int iflags = 0; - - if (xflags & FS_XFLAG_SYNC) - iflags |= S_SYNC; - if (xflags & FS_XFLAG_APPEND) - iflags |= S_APPEND; - if (xflags & FS_XFLAG_IMMUTABLE) - iflags |= S_IMMUTABLE; - if (xflags & FS_XFLAG_NOATIME) - iflags |= S_NOATIME; - - return iflags; -} - -static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct fsxattr fa; - - memset(&fa, 0, sizeof(fa)); - if (copy_from_user(&fa, (void __user *) arg, sizeof(fa))) - return -EFAULT; - - return ovl_ioctl_set_flags(file, cmd, arg, - ovl_fsxflags_to_iflags(fa.fsx_xflags)); -} - -static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - long ret; - - switch (cmd) { - case FS_IOC_GETFLAGS: - case FS_IOC_FSGETXATTR: - ret = ovl_real_ioctl(file, cmd, arg); - break; - - case FS_IOC_SETFLAGS: - ret = ovl_ioctl_set_fsflags(file, cmd, arg); - break; - - case FS_IOC_FSSETXATTR: - ret = ovl_ioctl_set_fsxflags(file, cmd, arg); - break; - - default: - ret = -ENOTTY; - } - - return ret; -} - -static long ovl_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case FS_IOC32_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; - - default: - return -ENOIOCTLCMD; - } - - return ovl_ioctl(file, cmd, arg); -} - enum ovl_copyop { OVL_COPY, OVL_CLONE, @@ -714,7 +598,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, revert_creds(old_cred); /* Update size */ - ovl_copyattr(ovl_inode_real(inode_out), inode_out); + ovl_copyattr(inode_out); fdput(real_in); fdput(real_out); @@ -757,6 +641,26 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, remap_flags, op); } +static int ovl_flush(struct file *file, fl_owner_t id) +{ + struct fd real; + const struct cred *old_cred; + int err; + + err = ovl_real_fdget(file, &real); + if (err) + return err; + + if (real.file->f_op->flush) { + old_cred = ovl_override_creds(file_inode(file)->i_sb); + err = real.file->f_op->flush(real.file, id); + revert_creds(old_cred); + } + fdput(real); + + return err; +} + const struct file_operations ovl_file_operations = { .open = ovl_open, .release = ovl_release, @@ -767,9 +671,8 @@ const struct file_operations ovl_file_operations = { .mmap = ovl_mmap, .fallocate = ovl_fallocate, .fadvise = ovl_fadvise, - .unlocked_ioctl = ovl_ioctl, - .compat_ioctl = ovl_compat_ioctl, - .splice_read = ovl_splice_read, + .flush = ovl_flush, + .splice_read = generic_file_splice_read, .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 79e8994e3bc1..9e61511de7a7 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,17 +10,23 @@ #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/ratelimit.h> +#include <linux/fiemap.h> +#include <linux/fileattr.h> +#include <linux/security.h> +#include <linux/namei.h> #include "overlayfs.h" -int ovl_setattr(struct dentry *dentry, struct iattr *attr) +int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool full_copy_up = false; struct dentry *upperdentry; const struct cred *old_cred; - err = setattr_prepare(dentry, attr); + err = setattr_prepare(&init_user_ns, dentry, attr); if (err) return err; @@ -29,12 +35,6 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) goto out; if (attr->ia_valid & ATTR_SIZE) { - struct inode *realinode = d_inode(ovl_dentry_real(dentry)); - - err = -ETXTBSY; - if (atomic_read(&realinode->i_writecount) < 0) - goto out_drop_write; - /* Truncate should trigger data copy up as well */ full_copy_up = true; } @@ -58,12 +58,30 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; + /* + * We might have to translate ovl file into real file object + * once use cases emerge. For now, simply don't let underlying + * filesystem rely on attr->ia_file + */ + attr->ia_valid &= ~ATTR_FILE; + + /* + * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN + * set. Overlayfs does not pass O_TRUNC flag to underlying + * filesystem during open -> do not pass ATTR_OPEN. This + * disables optimization in fuse which assumes open(O_TRUNC) + * already set file size to 0. But we never passed O_TRUNC to + * fuse. So by clearing ATTR_OPEN, fuse will be forced to send + * setattr request to server. + */ + attr->ia_valid &= ~ATTR_OPEN; + inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); - err = notify_change(upperdentry, attr, NULL); + err = ovl_do_notify_change(ofs, upperdentry, attr); revert_creds(old_cred); if (!err) - ovl_copyattr(upperdentry->d_inode, dentry->d_inode); + ovl_copyattr(dentry->d_inode); inode_unlock(upperdentry->d_inode); if (winode) @@ -75,10 +93,11 @@ out: return err; } -static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) +static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); + unsigned int xinoshift = 64 - xinobits; if (samefs) { /* @@ -87,24 +106,24 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) * which is friendly to du -x. */ stat->dev = dentry->d_sb->s_dev; - return 0; + return; } else if (xinobits) { - unsigned int shift = 64 - xinobits; /* * All inode numbers of underlying fs should not be using the * high xinobits, so we use high xinobits to partition the * overlay st_ino address space. The high bits holds the fsid - * (upper fsid is 0). This way overlay inode numbers are unique - * and all inodes use overlay st_dev. Inode numbers are also - * persistent for a given layer configuration. + * (upper fsid is 0). The lowest xinobit is reserved for mapping + * the non-persistent inode numbers range in case of overflow. + * This way all overlay inode numbers are unique and use the + * overlay st_dev. */ - if (stat->ino >> shift) { + if (likely(!(stat->ino >> xinoshift))) { + stat->ino |= ((u64)fsid) << (xinoshift + 1); + stat->dev = dentry->d_sb->s_dev; + return; + } else if (ovl_xino_warn(dentry->d_sb)) { pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", dentry, stat->ino, xinobits); - } else { - stat->ino |= ((u64)fsid) << shift; - stat->dev = dentry->d_sb->s_dev; - return 0; } } @@ -130,18 +149,17 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) */ stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; } - - return 0; } -int ovl_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; enum ovl_path_type type; struct path realpath; const struct cred *old_cred; - bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + struct inode *inode = d_inode(dentry); + bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; int err; bool metacopy_blocks = false; @@ -154,6 +172,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (err) goto out; + /* Report the effective immutable/append-only STATX flags */ + generic_fill_statx_attr(inode, stat); + /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. @@ -230,9 +251,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, } } - err = ovl_map_dev_ino(dentry, stat, fsid); - if (err) - goto out; + ovl_map_dev_ino(dentry, stat, fsid); /* * It's probably not worth it to count subdirs to get the @@ -257,15 +276,18 @@ out: return err; } -int ovl_permission(struct inode *inode, int mask) +int ovl_permission(struct user_namespace *mnt_userns, + struct inode *inode, int mask) { struct inode *upperinode = ovl_inode_upper(inode); - struct inode *realinode = upperinode ?: ovl_inode_lower(inode); + struct inode *realinode; + struct path realpath; const struct cred *old_cred; int err; /* Careful in RCU walk mode */ - if (!realinode) { + ovl_i_path_real(inode, &realpath); + if (!realpath.dentry) { WARN_ON(!(mask & MAY_NOT_BLOCK)); return -ECHILD; } @@ -274,10 +296,11 @@ int ovl_permission(struct inode *inode, int mask) * Check overlay inode with the creds of task and underlying inode * with creds of mounter */ - err = generic_permission(inode, mask); + err = generic_permission(&init_user_ns, inode, mask); if (err) return err; + realinode = d_inode(realpath.dentry); old_cred = ovl_override_creds(inode->i_sb); if (!upperinode && !special_file(realinode->i_mode) && mask & MAY_WRITE) { @@ -285,7 +308,7 @@ int ovl_permission(struct inode *inode, int mask) /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; } - err = inode_permission(realinode, mask); + err = inode_permission(mnt_user_ns(realpath.mnt), realinode, mask); revert_creds(old_cred); return err; @@ -307,18 +330,26 @@ static const char *ovl_get_link(struct dentry *dentry, return p; } -bool ovl_is_private_xattr(const char *name) +bool ovl_is_private_xattr(struct super_block *sb, const char *name) { - return strncmp(name, OVL_XATTR_PREFIX, - sizeof(OVL_XATTR_PREFIX) - 1) == 0; + struct ovl_fs *ofs = sb->s_fs_info; + + if (ofs->config.userxattr) + return strncmp(name, OVL_XATTR_USER_PREFIX, + sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; + else + return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, + sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; } int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdentry = ovl_i_dentry_upper(inode); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); + struct path realpath; const struct cred *old_cred; err = ovl_want_write(dentry); @@ -326,7 +357,10 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, goto out; if (!value && !upperdentry) { - err = vfs_getxattr(realdentry, name, NULL, 0); + ovl_path_lower(dentry, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); + err = vfs_getxattr(mnt_user_ns(realpath.mnt), realdentry, name, NULL, 0); + revert_creds(old_cred); if (err < 0) goto out_drop_write; } @@ -340,16 +374,17 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, } old_cred = ovl_override_creds(dentry->d_sb); - if (value) - err = vfs_setxattr(realdentry, name, value, size, flags); - else { + if (value) { + err = ovl_do_setxattr(ofs, realdentry, name, value, size, + flags); + } else { WARN_ON(flags != XATTR_REPLACE); - err = vfs_removexattr(realdentry, name); + err = ovl_do_removexattr(ofs, realdentry, name); } revert_creds(old_cred); /* copy c/mtime */ - ovl_copyattr(d_inode(realdentry), inode); + ovl_copyattr(inode); out_drop_write: ovl_drop_write(dentry); @@ -362,24 +397,27 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, { ssize_t res; const struct cred *old_cred; - struct dentry *realdentry = - ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); + struct path realpath; + ovl_i_path_real(inode, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_getxattr(realdentry, name, value, size); + res = vfs_getxattr(mnt_user_ns(realpath.mnt), realpath.dentry, name, value, size); revert_creds(old_cred); return res; } -static bool ovl_can_list(const char *s) +static bool ovl_can_list(struct super_block *sb, const char *s) { - /* List all non-trusted xatts */ + /* Never list private (.overlay) */ + if (ovl_is_private_xattr(sb, s)) + return false; + + /* List all non-trusted xattrs */ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) return true; - /* Never list trusted.overlay, list other trusted for superuser only */ - return !ovl_is_private_xattr(s) && - ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); + /* list other trusted for superuser only */ + return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) @@ -405,7 +443,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (!ovl_can_list(s)) { + if (!ovl_can_list(dentry->d_sb, s)) { res -= slen; memmove(s, s + slen, len); } else { @@ -416,28 +454,107 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; } -struct posix_acl *ovl_get_acl(struct inode *inode, int type) +#ifdef CONFIG_FS_POSIX_ACL +/* + * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone + * of the POSIX ACLs retrieved from the lower layer to this function to not + * alter the POSIX ACLs for the underlying filesystem. + */ +static void ovl_idmap_posix_acl(struct inode *realinode, + struct user_namespace *mnt_userns, + struct posix_acl *acl) +{ + struct user_namespace *fs_userns = i_user_ns(realinode); + + for (unsigned int i = 0; i < acl->a_count; i++) { + vfsuid_t vfsuid; + vfsgid_t vfsgid; + + struct posix_acl_entry *e = &acl->a_entries[i]; + switch (e->e_tag) { + case ACL_USER: + vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid); + e->e_uid = vfsuid_into_kuid(vfsuid); + break; + case ACL_GROUP: + vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid); + e->e_gid = vfsgid_into_kgid(vfsgid); + break; + } + } +} + +/* + * When the relevant layer is an idmapped mount we need to take the idmapping + * of the layer into account and translate any ACL_{GROUP,USER} values + * according to the idmapped mount. + * + * We cannot alter the ACLs returned from the relevant layer as that would + * alter the cached values filesystem wide for the lower filesystem. Instead we + * can clone the ACLs and then apply the relevant idmapping of the layer. + * + * This is obviously only relevant when idmapped layers are used. + */ +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) { struct inode *realinode = ovl_inode_real(inode); - const struct cred *old_cred; - struct posix_acl *acl; + struct posix_acl *acl, *clone; + struct path realpath; - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) + if (!IS_POSIXACL(realinode)) return NULL; - old_cred = ovl_override_creds(inode->i_sb); - acl = get_acl(realinode, type); - revert_creds(old_cred); + /* Careful in RCU walk mode */ + ovl_i_path_real(inode, &realpath); + if (!realpath.dentry) { + WARN_ON(!rcu); + return ERR_PTR(-ECHILD); + } + + if (rcu) { + acl = get_cached_acl_rcu(realinode, type); + } else { + const struct cred *old_cred; + + old_cred = ovl_override_creds(inode->i_sb); + acl = get_acl(realinode, type); + revert_creds(old_cred); + } + /* + * If there are no POSIX ACLs, or we encountered an error, + * or the layer isn't idmapped we don't need to do anything. + */ + if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl)) + return acl; + + /* + * We only get here if the layer is idmapped. So drop out of RCU path + * walk so we can clone the ACLs. There's no need to release the ACLs + * since get_cached_acl_rcu() doesn't take a reference on the ACLs. + */ + if (rcu) + return ERR_PTR(-ECHILD); - return acl; + clone = posix_acl_clone(acl, GFP_KERNEL); + if (!clone) + clone = ERR_PTR(-ENOMEM); + else + ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone); + /* + * Since we're not in RCU path walk we always need to release the + * original ACLs. + */ + posix_acl_release(acl); + return clone; } +#endif int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) { if (flags & S_ATIME) { struct ovl_fs *ofs = inode->i_sb->s_fs_info; struct path upperpath = { - .mnt = ofs->upper_mnt, + .mnt = ovl_upper_mnt(ofs), .dentry = ovl_upperdentry_dereference(OVL_I(inode)), }; @@ -453,18 +570,147 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int err; - struct inode *realinode = ovl_inode_real(inode); + struct inode *realinode = ovl_inode_realdata(inode); const struct cred *old_cred; if (!realinode->i_op->fiemap) return -EOPNOTSUPP; old_cred = ovl_override_creds(inode->i_sb); + err = realinode->i_op->fiemap(realinode, fieinfo, start, len); + revert_creds(old_cred); - if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) - filemap_write_and_wait(realinode->i_mapping); + return err; +} - err = realinode->i_op->fiemap(realinode, fieinfo, start, len); +/* + * Work around the fact that security_file_ioctl() takes a file argument. + * Introducing security_inode_fileattr_get/set() hooks would solve this issue + * properly. + */ +static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa, + bool set) +{ + struct file *file; + unsigned int cmd; + int err; + + file = dentry_open(realpath, O_RDONLY, current_cred()); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (set) + cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS; + else + cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS; + + err = security_file_ioctl(file, cmd, 0); + fput(file); + + return err; +} + +int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, true); + if (err) + return err; + + return vfs_fileattr_set(mnt_user_ns(realpath->mnt), realpath->dentry, fa); +} + +int ovl_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct path upperpath; + const struct cred *old_cred; + unsigned int flags; + int err; + + err = ovl_want_write(dentry); + if (err) + goto out; + + err = ovl_copy_up(dentry); + if (!err) { + ovl_path_real(dentry, &upperpath); + + old_cred = ovl_override_creds(inode->i_sb); + /* + * Store immutable/append-only flags in xattr and clear them + * in upper fileattr (in case they were set by older kernel) + * so children of "ovl-immutable" directories lower aliases of + * "ovl-immutable" hardlinks could be copied up. + * Clear xattr when flags are cleared. + */ + err = ovl_set_protattr(inode, upperpath.dentry, fa); + if (!err) + err = ovl_real_fileattr_set(&upperpath, fa); + revert_creds(old_cred); + + /* + * Merge real inode flags with inode flags read from + * overlay.protattr xattr + */ + flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; + + BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); + flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; + inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); + + /* Update ctime */ + ovl_copyattr(inode); + } + ovl_drop_write(dentry); +out: + return err; +} + +/* Convert inode protection flags to fileattr flags */ +static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) +{ + BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); + BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + + if (inode->i_flags & S_APPEND) { + fa->flags |= FS_APPEND_FL; + fa->fsx_xflags |= FS_XFLAG_APPEND; + } + if (inode->i_flags & S_IMMUTABLE) { + fa->flags |= FS_IMMUTABLE_FL; + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + } +} + +int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, false); + if (err) + return err; + + err = vfs_fileattr_get(realpath->dentry, fa); + if (err == -ENOIOCTLCMD) + err = -ENOTTY; + return err; +} + +int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct path realpath; + const struct cred *old_cred; + int err; + + ovl_path_real(dentry, &realpath); + + old_cred = ovl_override_creds(inode->i_sb); + err = ovl_real_fileattr_get(&realpath, fa); + ovl_fileattr_prot_flags(inode, fa); revert_creds(old_cred); return err; @@ -478,6 +724,8 @@ static const struct inode_operations ovl_file_inode_operations = { .get_acl = ovl_get_acl, .update_time = ovl_update_time, .fiemap = ovl_fiemap, + .fileattr_get = ovl_fileattr_get, + .fileattr_set = ovl_fileattr_set, }; static const struct inode_operations ovl_symlink_inode_operations = { @@ -504,11 +752,11 @@ static const struct address_space_operations ovl_aops = { /* * It is possible to stack overlayfs instance on top of another - * overlayfs instance as lower layer. We need to annonate the + * overlayfs instance as lower layer. We need to annotate the * stackable i_mutex locks according to stack level of the super * block instance. An overlayfs instance can never be in stack * depth 0 (there is always a real fs below it). An overlayfs - * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. + * inode lock will use the lockdep annotation ovl_i_mutex_key[depth]. * * For example, here is a snip from /proc/lockdep_chains after * dir_iterate of nested overlayfs: @@ -561,27 +809,76 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #endif } -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, - unsigned long ino, int fsid) +static void ovl_next_ino(struct inode *inode) +{ + struct ovl_fs *ofs = inode->i_sb->s_fs_info; + + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); + if (unlikely(!inode->i_ino)) + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); +} + +static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) { int xinobits = ovl_xino_bits(inode->i_sb); + unsigned int xinoshift = 64 - xinobits; /* * When d_ino is consistent with st_ino (samefs or i_ino has enough * bits to encode layer), set the same value used for st_ino to i_ino, * so inode number exposed via /proc/locks and a like will be * consistent with d_ino and st_ino values. An i_ino value inconsistent - * with d_ino also causes nfsd readdirplus to fail. When called from - * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real - * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). + * with d_ino also causes nfsd readdirplus to fail. */ - if (ovl_same_dev(inode->i_sb)) { - inode->i_ino = ino; - if (xinobits && fsid && !(ino >> (64 - xinobits))) - inode->i_ino |= (unsigned long)fsid << (64 - xinobits); - } else { - inode->i_ino = get_next_ino(); + inode->i_ino = ino; + if (ovl_same_fs(inode->i_sb)) { + return; + } else if (xinobits && likely(!(ino >> xinoshift))) { + inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); + return; + } + + /* + * For directory inodes on non-samefs with xino disabled or xino + * overflow, we allocate a non-persistent inode number, to be used for + * resolving st_ino collisions in ovl_map_dev_ino(). + * + * To avoid ino collision with legitimate xino values from upper + * layer (fsid 0), use the lowest xinobit to map the non + * persistent inode numbers to the unified st_ino address space. + */ + if (S_ISDIR(inode->i_mode)) { + ovl_next_ino(inode); + if (xinobits) { + inode->i_ino &= ~0UL >> xinobits; + inode->i_ino |= 1UL << xinoshift; + } + } +} + +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid) +{ + struct inode *realinode; + struct ovl_inode *oi = OVL_I(inode); + + if (oip->upperdentry) + oi->__upperdentry = oip->upperdentry; + if (oip->lowerpath && oip->lowerpath->dentry) { + oi->lowerpath.dentry = dget(oip->lowerpath->dentry); + oi->lowerpath.layer = oip->lowerpath->layer; } + if (oip->lowerdata) + oi->lowerdata = igrab(d_inode(oip->lowerdata)); + + realinode = ovl_inode_real(inode); + ovl_copyattr(inode); + ovl_copyflags(realinode, inode); + ovl_map_ino(inode, ino, fsid); +} + +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) +{ inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL @@ -625,7 +922,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, * For the first, copy up case, the union nlink does not change, whether the * operation succeeds or fails, but the upper inode nlink may change. * Therefore, before copy up, we store the union nlink value relative to the - * lower inode nlink in the index inode xattr trusted.overlay.nlink. + * lower inode nlink in the index inode xattr .overlay.nlink. * * For the second, upper hardlink case, the union nlink should be incremented * or decremented IFF the operation succeeds, aligned with nlink change of the @@ -660,8 +957,8 @@ static int ovl_set_nlink_common(struct dentry *dentry, if (WARN_ON(len >= sizeof(buf))) return -EIO; - return ovl_do_setxattr(ovl_dentry_upper(dentry), - OVL_XATTR_NLINK, buf, len, 0); + return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry), + OVL_XATTR_NLINK, buf, len); } int ovl_set_nlink_upper(struct dentry *dentry) @@ -674,7 +971,7 @@ int ovl_set_nlink_lower(struct dentry *dentry) return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); } -unsigned int ovl_get_nlink(struct dentry *lowerdentry, +unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback) { @@ -686,7 +983,8 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) return fallback; - err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); + err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK, + &buf, sizeof(buf) - 1); if (err < 0) goto fail; @@ -719,7 +1017,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode, rdev, 0, 0); + ovl_fill_inode(inode, mode, rdev); return inode; } @@ -843,7 +1141,7 @@ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) * Does overlay inode need to be hashed by lower inode? */ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, - struct dentry *lower, struct dentry *index) + struct dentry *lower, bool index) { struct ovl_fs *ofs = sb->s_fs_info; @@ -856,7 +1154,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, return true; /* Yes, if won't be copied up */ - if (!ofs->upper_mnt) + if (!ovl_upper_mnt(ofs)) return true; /* No, if lower hardlink is or will be broken on copy up */ @@ -884,15 +1182,20 @@ static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip) { + struct ovl_fs *ofs = OVL_FS(sb); struct dentry *upperdentry = oip->upperdentry; struct ovl_path *lowerpath = oip->lowerpath; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; + struct path realpath = { + .dentry = upperdentry ?: lowerdentry, + .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt, + }; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, oip->index); - int fsid = bylower ? oip->lowerpath->layer->fsid : 0; - bool is_dir, metacopy = false; + int fsid = bylower ? lowerpath->layer->fsid : 0; + bool is_dir; unsigned long ino = 0; int err = oip->newinode ? -EEXIST : -ENOMEM; @@ -931,7 +1234,8 @@ struct inode *ovl_get_inode(struct super_block *sb, /* Recalculate nlink for non-dir due to indexing */ if (!is_dir) - nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); + nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry, + nlink); set_nlink(inode, nlink); ino = key->i_ino; } else { @@ -941,25 +1245,18 @@ struct inode *ovl_get_inode(struct super_block *sb, err = -ENOMEM; goto out_err; } + ino = realinode->i_ino; + fsid = lowerpath->layer->fsid; } - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); - ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_inode_init(inode, oip, ino, fsid); - if (upperdentry && ovl_is_impuredir(upperdentry)) + if (upperdentry && ovl_is_impuredir(sb, upperdentry)) ovl_set_flag(OVL_IMPURE, inode); if (oip->index) ovl_set_flag(OVL_INDEX, inode); - if (upperdentry) { - err = ovl_check_metacopy_xattr(upperdentry); - if (err < 0) - goto out_err; - metacopy = err; - if (!metacopy) - ovl_set_flag(OVL_UPPERDATA, inode); - } - OVL_I(inode)->redirect = oip->redirect; if (bylower) @@ -968,11 +1265,15 @@ struct inode *ovl_get_inode(struct super_block *sb, /* Check for non-merge dir that may have whiteouts */ if (is_dir) { if (((upperdentry && lowerdentry) || oip->numlower > 1) || - ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { + ovl_path_check_origin_xattr(ofs, &realpath)) { ovl_set_flag(OVL_WHITEOUTS, inode); } } + /* Check for immutable/append-only inode flags in xattr */ + if (upperdentry) + ovl_check_protattr(inode, upperdentry); + if (inode->i_state & I_NEW) unlock_new_inode(inode); out: diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index ed9e129fae04..0fd1d5fdfc72 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -16,6 +16,7 @@ struct ovl_lookup_data { struct super_block *sb; + struct vfsmount *mnt; struct qstr name; bool is_dir; bool opaque; @@ -25,13 +26,14 @@ struct ovl_lookup_data { bool metacopy; }; -static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, +static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d, size_t prelen, const char *post) { int res; char *buf; + struct ovl_fs *ofs = OVL_FS(d->sb); - buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post)); + buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post)); if (IS_ERR_OR_NULL(buf)) return PTR_ERR(buf); @@ -40,7 +42,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, * One of the ancestor path elements in an absolute path * lookup in ovl_lookup_layer() could have been opaque and * that will stop further lookup in lower layers (d->stop=true) - * But we have found an absolute redirect in decendant path + * But we have found an absolute redirect in descendant path * element and that should force continue lookup in lower * layers (reset d->stop). */ @@ -104,12 +106,13 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len) return 0; } -static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) +static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry, + enum ovl_xattr ox) { int res, err; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, name, NULL, 0); + res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -123,7 +126,7 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, name, fh->buf, res); + res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res); if (res < 0) goto fail; @@ -148,17 +151,22 @@ invalid: goto out; } -struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt, - bool connected) +struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct vfsmount *mnt, bool connected) { struct dentry *real; int bytes; + if (!capable(CAP_DAC_READ_SEARCH)) + return NULL; + /* * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. + * In case of uuid=off option just make sure that stored uuid is null. */ - if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid)) + if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) : + !uuid_is_null(&fh->fb.uuid)) return NULL; bytes = (fh->fb.len - offsetof(struct ovl_fb, fid)); @@ -186,21 +194,43 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt, return real; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path) { - return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); + return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE); +} + +static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, + const char *name, + struct dentry *base, int len, + bool drop_negative) +{ + struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len); + + if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { + if (drop_negative && ret->d_lockref.count == 1) { + spin_lock(&ret->d_lock); + /* Recheck condition under lock */ + if (d_is_negative(ret) && ret->d_lockref.count == 1) + __d_drop(ret); + spin_unlock(&ret->d_lock); + } + dput(ret); + ret = ERR_PTR(-ENOENT); + } + return ret; } static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, - struct dentry **ret) + struct dentry **ret, bool drop_negative) { struct dentry *this; + struct path path; int err; bool last_element = !post[0]; - this = lookup_positive_unlocked(name, base, namelen); + this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -226,12 +256,15 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, d->stop = true; goto put_and_out; } + + path.dentry = this; + path.mnt = d->mnt; if (!d_can_lookup(this)) { if (d->is_dir || !last_element) { d->stop = true; goto put_and_out; } - err = ovl_check_metacopy_xattr(this); + err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path); if (err < 0) goto out_err; @@ -251,14 +284,14 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, if (d->last) goto out; - if (ovl_is_opaquedir(this)) { + if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { d->stop = true; if (last_element) d->opaque = true; goto out; } } - err = ovl_check_redirect(this, d, prelen, post); + err = ovl_check_redirect(&path, d, prelen, post); if (err) goto out_err; out: @@ -276,7 +309,7 @@ out_err: } static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, - struct dentry **ret) + struct dentry **ret, bool drop_negative) { /* Counting down from the end, since the prefix can change */ size_t rem = d->name.len - 1; @@ -285,7 +318,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, if (d->name.name[0] != '/') return ovl_lookup_single(base, d, d->name.name, d->name.len, - 0, "", ret); + 0, "", ret, drop_negative); while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) { const char *s = d->name.name + d->name.len - rem; @@ -298,7 +331,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return -EIO; err = ovl_lookup_single(base, d, s, thislen, - d->name.len - rem, next, &base); + d->name.len - rem, next, &base, + drop_negative); dput(dentry); if (err) return err; @@ -331,7 +365,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, ofs->layers[i].fs->bad_uuid) continue; - origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt, + origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt, connected); if (origin) break; @@ -343,7 +377,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, return PTR_ERR(origin); if (upperdentry && !ovl_is_whiteout(upperdentry) && - ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) + inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode)) goto invalid; if (!*stackp) @@ -364,13 +398,13 @@ invalid: upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); - return -EIO; + return -ESTALE; } static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, - struct ovl_path **stackp, unsigned int *ctrp) + struct ovl_path **stackp) { - struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN); + struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN); int err; if (IS_ERR_OR_NULL(fh)) @@ -385,10 +419,6 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } - if (WARN_ON(*ctrp)) - return -EIO; - - *ctrp = 1; return 0; } @@ -396,10 +426,10 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, * Verify that @fh matches the file handle stored in xattr @name. * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -static int ovl_verify_fh(struct dentry *dentry, const char *name, - const struct ovl_fh *fh) +static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const struct ovl_fh *fh) { - struct ovl_fh *ofh = ovl_get_fh(dentry, name); + struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox); int err = 0; if (!ofh) @@ -423,23 +453,24 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name, * * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_set_fh(struct dentry *dentry, const char *name, - struct dentry *real, bool is_upper, bool set) +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, bool is_upper, + bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(real, is_upper); + fh = ovl_encode_real_fh(ofs, real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) { fh = NULL; goto fail; } - err = ovl_verify_fh(dentry, name, fh); + err = ovl_verify_fh(ofs, dentry, ox, fh); if (set && err == -ENODATA) - err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0); + err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); if (err) goto fail; @@ -464,11 +495,11 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) if (!d_is_dir(index)) return dget(index); - fh = ovl_get_fh(index, OVL_XATTR_UPPER); + fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER); if (IS_ERR_OR_NULL(fh)) return ERR_CAST(fh); - upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true); + upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true); kfree(fh); if (IS_ERR_OR_NULL(upper)) @@ -484,12 +515,6 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) return upper; } -/* Is this a leftover from create/whiteout of directory index entry? */ -static bool ovl_is_temp_index(struct dentry *index) -{ - return index->d_name.name[0] == '#'; -} - /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. @@ -507,11 +532,6 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (!d_inode(index)) return 0; - /* Cleanup leftover from index create/cleanup attempt */ - err = -ESTALE; - if (ovl_is_temp_index(index)) - goto fail; - err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fb)*2) goto fail; @@ -568,7 +588,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) goto fail; } - err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh); + err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh); dput(upper); if (err) goto fail; @@ -579,7 +599,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) if (err) goto fail; - if (ovl_get_nlink(origin.dentry, index, 0) == 0) + if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0) goto orphan; } @@ -628,15 +648,16 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) * If the index dentry for a copy up origin inode is positive, but points * to an inode different than the upper inode, then either the upper inode * has been copied up and not indexed or it was indexed, but since then - * index dir was cleared. Either way, that index cannot be used to indentify + * index dir was cleared. Either way, that index cannot be used to identify * the overlay inode. */ -int ovl_get_index_name(struct dentry *origin, struct qstr *name) +int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, + struct qstr *name) { struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(origin, false); + fh = ovl_encode_real_fh(ofs, origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); @@ -685,11 +706,12 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, bool is_dir = d_is_dir(origin); int err; - err = ovl_get_index_name(origin, &name); + err = ovl_get_index_name(ofs, origin, &name); if (err) return ERR_PTR(err); - index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len); + index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name, + ofs->indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); if (err == -ENOENT) { @@ -715,7 +737,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = ERR_PTR(-ESTALE); goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || - ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + inode_wrong_type(inode, d_inode(origin)->i_mode)) { /* * Index should always be of the same file type as origin * except for the case of a whiteout index. A whiteout @@ -735,7 +757,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, } /* Verify that dir index 'upper' xattr points to upper dir */ - err = ovl_verify_upper(index, upper, false); + err = ovl_verify_upper(ofs, index, upper, false); if (err) { if (err == -ESTALE) { pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", @@ -784,19 +806,19 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) } /* Fix missing 'origin' xattr */ -static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower, - struct dentry *upper) +static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, + struct dentry *lower, struct dentry *upper) { int err; - if (ovl_check_origin_xattr(upper)) + if (ovl_check_origin_xattr(ofs, upper)) return 0; err = ovl_want_write(dentry); if (err) return err; - err = ovl_set_origin(dentry, lower, upper); + err = ovl_set_origin(ofs, lower, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); @@ -823,7 +845,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct dentry *this; unsigned int i; int err; - bool metacopy = false; + bool uppermetacopy = false; struct ovl_lookup_data d = { .sb = dentry->d_sb, .name = dentry->d_name, @@ -841,18 +863,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, old_cred = ovl_override_creds(dentry->d_sb); upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { - err = ovl_lookup_layer(upperdir, &d, &upperdentry); + d.mnt = ovl_upper_mnt(ofs); + err = ovl_lookup_layer(upperdir, &d, &upperdentry, true); if (err) goto out; - if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { + if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) { dput(upperdentry); err = -EREMOTE; goto out; } if (upperdentry && !d.is_dir) { - unsigned int origin_ctr = 0; - /* * Lookup copy up origin by decoding origin file handle. * We may get a disconnected dentry, which is fine, @@ -863,13 +884,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(ofs, upperdentry, &origin_path, - &origin_ctr); + err = ovl_check_origin(ofs, upperdentry, &origin_path); if (err) goto out_put_upper; if (d.metacopy) - metacopy = true; + uppermetacopy = true; } if (d.redirect) { @@ -899,19 +919,27 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, else d.last = lower.layer->idx == roe->numlower; - err = ovl_lookup_layer(lower.dentry, &d, &this); + d.mnt = lower.layer->mnt; + err = ovl_lookup_layer(lower.dentry, &d, &this, false); if (err) goto out_put; if (!this) continue; + if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) { + dput(this); + err = -EPERM; + pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry); + goto out_put; + } + /* * If no origin fh is stored in upper of a merge dir, store fh * of lower dir and set upper parent "impure". */ if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) { - err = ovl_fix_origin(dentry, this, upperdentry); + err = ovl_fix_origin(ofs, dentry, this, upperdentry); if (err) { dput(this); goto out_put; @@ -930,7 +958,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (upperdentry && !ctr && ((d.is_dir && ovl_verify_lower(dentry->d_sb)) || (!d.is_dir && ofs->config.index && origin_path))) { - err = ovl_verify_origin(upperdentry, this, false); + err = ovl_verify_origin(ofs, upperdentry, this, false); if (err) { dput(this); if (d.is_dir) @@ -940,21 +968,21 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, origin = this; } - if (d.metacopy) - metacopy = true; - /* - * Do not store intermediate metacopy dentries in chain, - * except top most lower metacopy dentry - */ if (d.metacopy && ctr) { + /* + * Do not store intermediate metacopy dentries in + * lower chain, except top most lower metacopy dentry. + * Continue the loop so that if there is an absolute + * redirect on this dentry, poe can be reset to roe. + */ dput(this); - continue; + this = NULL; + } else { + stack[ctr].dentry = this; + stack[ctr].layer = lower.layer; + ctr++; } - stack[ctr].dentry = this; - stack[ctr].layer = lower.layer; - ctr++; - /* * Following redirects can have security consequences: it's like * a symlink into the lower layer without the permission checks. @@ -982,22 +1010,19 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } } - if (metacopy) { - /* - * Found a metacopy dentry but did not find corresponding - * data dentry - */ - if (d.metacopy) { - err = -EIO; - goto out_put; - } - - err = -EPERM; - if (!ofs->config.metacopy) { - pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", - dentry); - goto out_put; - } + /* + * For regular non-metacopy upper dentries, there is no lower + * path based lookup, hence ctr will be zero. If a dentry is found + * using ORIGIN xattr on upper, install it in stack. + * + * For metacopy dentry, path based lookup will find lower dentries. + * Just make sure a corresponding data dentry has been found. + */ + if (d.metacopy || (uppermetacopy && !ctr)) { + pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n", + dentry); + err = -EIO; + goto out_put; } else if (!d.is_dir && upperdentry && !ctr && origin_path) { if (WARN_ON(stack != NULL)) { err = -EIO; @@ -1005,25 +1030,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } stack = origin_path; ctr = 1; + origin = origin_path->dentry; origin_path = NULL; } /* - * Lookup index by lower inode and verify it matches upper inode. - * We only trust dir index if we verified that lower dir matches - * origin, otherwise dir index entries may be inconsistent and we - * ignore them. + * Always lookup index if there is no-upperdentry. + * + * For the case of upperdentry, we have set origin by now if it + * needed to be set. There are basically three cases. + * + * For directories, lookup index by lower inode and verify it matches + * upper inode. We only trust dir index if we verified that lower dir + * matches origin, otherwise dir index entries may be inconsistent + * and we ignore them. * - * For non-dir upper metacopy dentry, we already set "origin" if we - * verified that lower matched upper origin. If upper origin was - * not present (because lower layer did not support fh encode/decode), - * or indexing is not enabled, do not set "origin" and skip looking up - * index. This case should be handled in same way as a non-dir upper - * without ORIGIN is handled. + * For regular upper, we already set origin if upper had ORIGIN + * xattr. There is no verification though as there is no path + * based dentry lookup in lower in this case. + * + * For metacopy upper, we set a verified origin already if index + * is enabled and if upper had an ORIGIN xattr. * - * Always lookup index of non-dir non-metacopy and non-upper. */ - if (ctr && (!upperdentry || (!d.is_dir && !metacopy))) + if (!upperdentry && ctr) origin = stack[0].dentry; if (origin && ovl_indexdir(dentry->d_sb) && @@ -1050,13 +1080,21 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (upperdentry) ovl_dentry_set_upper_alias(dentry); else if (index) { - upperdentry = dget(index); - upperredirect = ovl_get_redirect_xattr(upperdentry, 0); + struct path upperpath = { + .dentry = upperdentry = dget(index), + .mnt = ovl_upper_mnt(ofs), + }; + + upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0); if (IS_ERR(upperredirect)) { err = PTR_ERR(upperredirect); upperredirect = NULL; goto out_free_oe; } + err = ovl_check_metacopy_xattr(ofs, &upperpath); + if (err < 0) + goto out_free_oe; + uppermetacopy = err; } if (upperdentry || ctr) { @@ -1074,8 +1112,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; + if (upperdentry && !uppermetacopy) + ovl_set_flag(OVL_UPPERDATA, inode); } + ovl_dentry_update_reval(dentry, upperdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + revert_creds(old_cred); if (origin_path) { dput(origin_path->dentry); @@ -1133,8 +1176,8 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct dentry *lowerdir = poe->lowerstack[i].dentry; - this = lookup_positive_unlocked(name->name, lowerdir, - name->len); + this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt), + name->name, lowerdir, name->len); if (IS_ERR(this)) { switch (PTR_ERR(this)) { case -ENOENT: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3d3f2b8bdae5..eee8f08d32b6 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/uuid.h> #include <linux/fs.h> +#include <linux/namei.h> #include "ovl_entry.h" #undef pr_fmt @@ -22,14 +23,20 @@ enum ovl_path_type { #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) #define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) -#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." -#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" -#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" -#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" -#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" -#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" -#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" -#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy" +#define OVL_XATTR_NAMESPACE "overlay." +#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE + +enum ovl_xattr { + OVL_XATTR_OPAQUE, + OVL_XATTR_REDIRECT, + OVL_XATTR_ORIGIN, + OVL_XATTR_IMPURE, + OVL_XATTR_NLINK, + OVL_XATTR_UPPER, + OVL_XATTR_METACOPY, + OVL_XATTR_PROTATTR, +}; enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ @@ -48,6 +55,12 @@ enum ovl_entry_flag { OVL_E_CONNECTED, }; +enum { + OVL_XINO_OFF, + OVL_XINO_AUTO, + OVL_XINO_ON, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -87,7 +100,7 @@ struct ovl_fb { u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ uuid_t uuid; /* uuid of filesystem */ - u32 fid[0]; /* file identifier should be 32bit aligned in-memory */ + u32 fid[]; /* file identifier should be 32bit aligned in-memory */ } __packed; /* In-memory and on-wire format for overlay file handle */ @@ -104,90 +117,184 @@ struct ovl_fh { #define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \ offsetof(struct ovl_fb, fid)) -static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) +extern const char *const ovl_xattr_table[][2]; +static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox) +{ + return ovl_xattr_table[ox][ofs->config.userxattr]; +} + +/* + * When changing ownership of an upper object map the intended ownership + * according to the upper layer's idmapping. When an upper mount idmaps files + * that are stored on-disk as owned by id 1001 to id 1000 this means stat on + * this object will report it as being owned by id 1000 when calling stat via + * the upper mount. + * In order to change ownership of an object so stat reports id 1000 when + * called on an idmapped upper mount the value written to disk - i.e., the + * value stored in ia_*id - must 1001. The mount mapping helper will thus take + * care to map 1000 to 1001. + * The mnt idmapping helpers are nops if the upper layer isn't idmapped. + */ +static inline int ovl_do_notify_change(struct ovl_fs *ofs, + struct dentry *upperdentry, + struct iattr *attr) +{ + return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL); +} + +static inline int ovl_do_rmdir(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry) { - int err = vfs_rmdir(dir, dentry); + int err = vfs_rmdir(ovl_upper_mnt_userns(ofs), dir, dentry); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; } -static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) +static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir, + struct dentry *dentry) { - int err = vfs_unlink(dir, dentry, NULL); + int err = vfs_unlink(ovl_upper_mnt_userns(ofs), dir, dentry, NULL); pr_debug("unlink(%pd2) = %i\n", dentry, err); return err; } -static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry) +static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry, + struct inode *dir, struct dentry *new_dentry) { - int err = vfs_link(old_dentry, dir, new_dentry, NULL); + int err = vfs_link(old_dentry, ovl_upper_mnt_userns(ofs), dir, new_dentry, NULL); pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err); return err; } -static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, +static inline int ovl_do_create(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(dir, dentry, mode, true); + int err = vfs_create(ovl_upper_mnt_userns(ofs), dir, dentry, mode, true); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } -static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, +static inline int ovl_do_mkdir(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_mkdir(dir, dentry, mode); + int err = vfs_mkdir(ovl_upper_mnt_userns(ofs), dir, dentry, mode); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } -static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, +static inline int ovl_do_mknod(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = vfs_mknod(dir, dentry, mode, dev); + int err = vfs_mknod(ovl_upper_mnt_userns(ofs), dir, dentry, mode, dev); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; } -static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, +static inline int ovl_do_symlink(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry, const char *oldname) { - int err = vfs_symlink(dir, dentry, oldname); + int err = vfs_symlink(ovl_upper_mnt_userns(ofs), dir, dentry, oldname); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; } -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name, + void *value, size_t size) { - int err = vfs_setxattr(dentry, name, value, size, flags); - pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n", + int err, len; + + WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb); + + err = vfs_getxattr(mnt_user_ns(path->mnt), path->dentry, + name, value, size); + len = (value && err > 0) ? err : 0; + + pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n", + path->dentry, name, min(len, 48), value, size, err); + return err; +} + +static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs, + struct dentry *upperdentry, + enum ovl_xattr ox, void *value, + size_t size) +{ + struct path upperpath = { + .dentry = upperdentry, + .mnt = ovl_upper_mnt(ofs), + }; + + return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size); +} + +static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs, + const struct path *path, + enum ovl_xattr ox, void *value, + size_t size) +{ + return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size); +} + +static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry, + const char *name, const void *value, + size_t size, int flags) +{ + int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name, + value, size, flags); + + pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n", dentry, name, min((int)size, 48), value, size, flags, err); return err; } -static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) +static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const void *value, + size_t size) { - int err = vfs_removexattr(dentry, name); + return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0); +} + +static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry, + const char *name) +{ + int err = vfs_removexattr(ovl_upper_mnt_userns(ofs), dentry, name); pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); return err; } -static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, - struct inode *newdir, struct dentry *newdentry, - unsigned int flags) +static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox) +{ + return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox)); +} + +static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, + struct dentry *olddentry, struct inode *newdir, + struct dentry *newdentry, unsigned int flags) { int err; + struct renamedata rd = { + .old_mnt_userns = ovl_upper_mnt_userns(ofs), + .old_dir = olddir, + .old_dentry = olddentry, + .new_mnt_userns = ovl_upper_mnt_userns(ofs), + .new_dir = newdir, + .new_dentry = newdentry, + .flags = flags, + }; pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); - err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); + err = vfs_rename(&rd); if (err) { pr_debug("...rename(%pd2, %pd2, ...) = %i\n", olddentry, newdentry, err); @@ -195,20 +302,31 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, return err; } -static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) +static inline int ovl_do_whiteout(struct ovl_fs *ofs, + struct inode *dir, struct dentry *dentry) { - int err = vfs_whiteout(dir, dentry); + int err = vfs_whiteout(ovl_upper_mnt_userns(ofs), dir, dentry); pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } -static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) +static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, + struct dentry *dentry, umode_t mode) { - struct dentry *ret = vfs_tmpfile(dentry, mode, 0); - int err = PTR_ERR_OR_ZERO(ret); + struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry }; + struct file *file = vfs_tmpfile_open(ovl_upper_mnt_userns(ofs), &path, mode, + O_LARGEFILE | O_WRONLY, current_cred()); + int err = PTR_ERR_OR_ZERO(file); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); - return ret; + return file; +} + +static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, + const char *name, + struct dentry *base, int len) +{ + return lookup_one(ovl_upper_mnt_userns(ofs), name, base, len); } static inline bool ovl_open_flags_need_copy_up(int flags) @@ -219,6 +337,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) +{ + /* + * To avoid regressions in existing setups with overlay lower offline + * changes, we allow lower changes only if none of the new features + * are used. + */ + return (!ofs->config.index && !ofs->config.metacopy && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON); +} + + /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); @@ -230,15 +360,20 @@ bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); void ovl_path_lowerdata(struct dentry *dentry, struct path *path); +void ovl_i_path_real(struct inode *inode, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); +enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); +const struct ovl_layer *ovl_i_layer_lower(struct inode *inode); const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); @@ -264,25 +399,32 @@ void ovl_set_upperdata(struct inode *inode); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); -void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *lowerdata); void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_dentry_version_get(struct dentry *dentry); bool ovl_is_whiteout(struct dentry *dentry); -struct file *ovl_path_open(struct path *path, int flags); +struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); bool ovl_already_copied_up(struct dentry *dentry, int flags); -bool ovl_check_origin_xattr(struct dentry *dentry); -bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, - const char *name, const void *value, size_t size, +bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox); +bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); + +static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs, + struct dentry *upperdentry) +{ + struct path upperpath = { + .dentry = upperdentry, + .mnt = ovl_upper_mnt(ofs), + }; + return ovl_path_check_origin_xattr(ofs, &upperpath); +} + +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, + enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); -void ovl_set_flag(unsigned long flag, struct inode *inode); -void ovl_clear_flag(unsigned long flag, struct inode *inode); -bool ovl_test_flag(unsigned long flag, struct inode *inode); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); bool ovl_is_inuse(struct dentry *dentry); @@ -290,15 +432,46 @@ bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); -int ovl_check_metacopy_xattr(struct dentry *dentry); +int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_is_metacopy_dentry(struct dentry *dentry); -char *ovl_get_redirect_xattr(struct dentry *dentry, int padding); -ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value, - size_t padding); +char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); +int ovl_sync_status(struct ovl_fs *ofs); + +static inline void ovl_set_flag(unsigned long flag, struct inode *inode) +{ + set_bit(flag, &OVL_I(inode)->flags); +} + +static inline void ovl_clear_flag(unsigned long flag, struct inode *inode) +{ + clear_bit(flag, &OVL_I(inode)->flags); +} + +static inline bool ovl_test_flag(unsigned long flag, struct inode *inode) +{ + return test_bit(flag, &OVL_I(inode)->flags); +} + +static inline bool ovl_is_impuredir(struct super_block *sb, + struct dentry *upperdentry) +{ + struct ovl_fs *ofs = OVL_FS(sb); + struct path upperpath = { + .dentry = upperdentry, + .mnt = ovl_upper_mnt(ofs), + }; -static inline bool ovl_is_impuredir(struct dentry *dentry) + return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE); +} + +/* + * With xino=auto, we do best effort to keep all inodes on same st_dev and + * d_ino consistent with st_ino. + * With xino=on, we do the same effort but we warn if we failed. + */ +static inline bool ovl_xino_warn(struct super_block *sb) { - return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); + return OVL_FS(sb)->config.xino == OVL_XINO_ON; } /* All layers on same fs? */ @@ -339,18 +512,23 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len); static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { + if (fh_len < sizeof(struct ovl_fh)) + return -EINVAL; + return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET); } -struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt, - bool connected); +struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct vfsmount *mnt, bool connected); int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); -int ovl_verify_set_fh(struct dentry *dentry, const char *name, - struct dentry *real, bool is_upper, bool set); +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, bool is_upper, + bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); -int ovl_get_index_name(struct dentry *origin, struct qstr *name); +int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, + struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool verify); @@ -359,57 +537,82 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); -static inline int ovl_verify_origin(struct dentry *upper, +static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { - return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set); + return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin, + false, set); } -static inline int ovl_verify_upper(struct dentry *index, - struct dentry *upper, bool set) +static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, + struct dentry *upper, bool set) { - return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set); + return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set); } /* readdir.c */ extern const struct file_operations ovl_dir_operations; +struct file *ovl_dir_real_file(const struct file *file, bool want_upper); int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); -void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); +void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, + struct list_head *list); void ovl_cache_free(struct list_head *list); void ovl_dir_cache_free(struct inode *inode); -int ovl_check_d_type_supported(struct path *realpath); -void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, - struct dentry *dentry, int level); +int ovl_check_d_type_supported(const struct path *realpath); +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, + struct vfsmount *mnt, struct dentry *dentry, int level); int ovl_indexdir_cleanup(struct ovl_fs *ofs); +/* + * Can we iterate real dir directly? + * + * Non-merge dir may contain whiteouts from a time it was a merge upper, before + * lower dir was removed under it and possibly before it was rotated from upper + * to lower layer. + */ +static inline bool ovl_dir_is_real(struct dentry *dir) +{ + return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir)); +} + /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); int ovl_set_nlink_lower(struct dentry *dentry); -unsigned int ovl_get_nlink(struct dentry *lowerdentry, +unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); -int ovl_setattr(struct dentry *dentry, struct iattr *attr); -int ovl_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); -int ovl_permission(struct inode *inode, int mask); +int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr); +int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); +int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode, + int mask); int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags); int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -struct posix_acl *ovl_get_acl(struct inode *inode, int type); + +#ifdef CONFIG_FS_POSIX_ACL +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu); +#else +#define ovl_get_acl NULL +#endif + int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); -bool ovl_is_private_xattr(const char *name); +bool ovl_is_private_xattr(struct super_block *sb, const char *name); struct ovl_inode_params { struct inode *newinode; struct dentry *upperdentry; struct ovl_path *lowerpath; - struct dentry *index; + bool index; unsigned int numlower; char *redirect; struct dentry *lowerdata; }; +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); @@ -417,27 +620,37 @@ bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip); -static inline void ovl_copyattr(struct inode *from, struct inode *to) -{ - to->i_uid = from->i_uid; - to->i_gid = from->i_gid; - to->i_mode = from->i_mode; - to->i_atime = from->i_atime; - to->i_mtime = from->i_mtime; - to->i_ctime = from->i_ctime; - i_size_write(to, i_size_read(from)); -} +void ovl_copyattr(struct inode *to); + +/* vfs inode flags copied from real to ovl inode */ +#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) +/* vfs inode flags read from overlay.protattr xattr to ovl inode */ +#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) + +/* + * fileattr flags copied from lower to upper inode on copy up. + * We cannot copy up immutable/append-only flags, because that would prevent + * linking temp inode to upper dir, so we store them in xattr instead. + */ +#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) +#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) +#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) +#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) + +void ovl_check_protattr(struct inode *inode, struct dentry *upper); +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { - unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; + unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); struct ovl_cattr { dev_t rdev; @@ -448,25 +661,35 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) -struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, +int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, + struct dentry **newdentry, umode_t mode); +struct dentry *ovl_create_real(struct ovl_fs *ofs, + struct inode *dir, struct dentry *newdentry, + struct ovl_cattr *attr); +int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir); +struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr); -int ovl_cleanup(struct inode *dir, struct dentry *dentry); -struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; int __init ovl_aio_request_cache_init(void); void ovl_aio_request_cache_destroy(void); +int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); +int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); +int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int ovl_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_with_data(struct dentry *dentry); -int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_maybe_copy_up(struct dentry *dentry, int flags); -int ovl_copy_xattr(struct dentry *old, struct dentry *new); -int ovl_set_attr(struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); -int ovl_set_origin(struct dentry *dentry, struct dentry *lower, +int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); +int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, + bool is_upper); +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, struct dentry *upper); /* export.c */ diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 89015ea822e7..e1af8f660698 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -14,9 +14,12 @@ struct ovl_config { bool redirect_follow; const char *redirect_mode; bool index; + bool uuid; bool nfs_export; int xino; bool metacopy; + bool userxattr; + bool ovl_volatile; }; struct ovl_sb { @@ -46,7 +49,6 @@ struct ovl_path { /* private information held for overlayfs's superblock */ struct ovl_fs { - struct vfsmount *upper_mnt; unsigned int numlayer; /* Number of unique fs among layers including upper fs */ unsigned int numfs; @@ -68,20 +70,41 @@ struct ovl_fs { /* Did we take the inuse lock? */ bool upperdir_locked; bool workdir_locked; + bool share_whiteout; /* Traps in ovl inode cache */ - struct inode *upperdir_trap; struct inode *workbasedir_trap; struct inode *workdir_trap; struct inode *indexdir_trap; /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */ int xino_mode; + /* For allocation of non-persistent inode numbers */ + atomic_long_t last_ino; + /* Whiteout dentry cache */ + struct dentry *whiteout; + /* r/o snapshot of upperdir sb's only taken on volatile mounts */ + errseq_t errseq; }; +static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs) +{ + return ofs->layers[0].mnt; +} + +static inline struct user_namespace *ovl_upper_mnt_userns(struct ovl_fs *ofs) +{ + return mnt_user_ns(ovl_upper_mnt(ofs)); +} + static inline struct ovl_fs *OVL_FS(struct super_block *sb) { return (struct ovl_fs *)sb->s_fs_info; } +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return !ofs->config.ovl_volatile; +} + /* private information held for every overlayfs dentry */ struct ovl_entry { union { @@ -111,7 +134,7 @@ struct ovl_inode { unsigned long flags; struct inode vfs_inode; struct dentry *__upperdentry; - struct inode *lower; + struct ovl_path lowerpath; /* synchronize copy up and more */ struct mutex lock; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 40ac9ce2465a..2b210640036c 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -170,7 +170,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, return p; } -static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, +static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -179,22 +179,22 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, struct ovl_cache_entry *p; if (ovl_cache_entry_find_link(name, len, &newp, &parent)) - return 0; + return true; p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; - return -ENOMEM; + return false; } list_add_tail(&p->l_node, rdd->list); rb_link_node(&p->node, parent, newp); rb_insert_color(&p->node, rdd->root); - return 0; + return true; } -static int ovl_fill_lowest(struct ovl_readdir_data *rdd, +static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -211,7 +211,7 @@ static int ovl_fill_lowest(struct ovl_readdir_data *rdd, list_add_tail(&p->l_node, &rdd->middle); } - return rdd->err; + return rdd->err == 0; } void ovl_cache_free(struct list_head *list) @@ -250,7 +250,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) } } -static int ovl_fill_merge(struct dir_context *ctx, const char *name, +static bool ovl_fill_merge(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -264,11 +264,11 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); } -static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) { int err; struct ovl_cache_entry *p; - struct dentry *dentry; + struct dentry *dentry, *dir = path->dentry; const struct cred *old_cred; old_cred = ovl_override_creds(rdd->dentry->d_sb); @@ -278,7 +278,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) while (rdd->first_maybe_whiteout) { p = rdd->first_maybe_whiteout; rdd->first_maybe_whiteout = p->next_maybe_whiteout; - dentry = lookup_one_len(p->name, dir, p->len); + dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); if (!IS_ERR(dentry)) { p->is_whiteout = ovl_is_whiteout(dentry); dput(dentry); @@ -291,13 +291,13 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) return err; } -static inline int ovl_dir_read(struct path *realpath, +static inline int ovl_dir_read(const struct path *realpath, struct ovl_readdir_data *rdd) { struct file *realfile; int err; - realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); + realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); if (IS_ERR(realfile)) return PTR_ERR(realfile); @@ -312,25 +312,13 @@ static inline int ovl_dir_read(struct path *realpath, } while (!err && rdd->count); if (!err && rdd->first_maybe_whiteout && rdd->dentry) - err = ovl_check_whiteouts(realpath->dentry, rdd); + err = ovl_check_whiteouts(realpath, rdd); fput(realfile); return err; } -/* - * Can we iterate real dir directly? - * - * Non-merge dir may contain whiteouts from a time it was a merge upper, before - * lower dir was removed under it and possibly before it was rotated from upper - * to lower layer. - */ -static bool ovl_dir_is_real(struct dentry *dir) -{ - return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir)); -} - static void ovl_dir_reset(struct file *file) { struct ovl_dir_file *od = file->private_data; @@ -438,15 +426,23 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) /* Map inode number to lower fs unique range */ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, - const char *name, int namelen) + const char *name, int namelen, bool warn) { - if (ino >> (64 - xinobits)) { - pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", - namelen, name, ino, xinobits); + unsigned int xinoshift = 64 - xinobits; + + if (unlikely(ino >> xinoshift)) { + if (warn) { + pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + namelen, name, ino, xinobits); + } return ino; } - return ino | ((u64)fsid) << (64 - xinobits); + /* + * The lowest xinobit is reserved for mapping the non-peresistent inode + * numbers range, but this range is only exposed via st_ino, not here. + */ + return ino | ((u64)fsid) << (xinoshift + 1); } /* @@ -459,7 +455,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, * copy up origin, call vfs_getattr() on the overlay entry to make * sure that d_ino will be consistent with st_ino from stat(2). */ -static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) +static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p) { struct dentry *dir = path->dentry; @@ -483,8 +479,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) goto get; } } - this = lookup_one_len(p->name, dir, p->len); + this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { + /* Mark a stale entry */ + p->is_whiteout = true; if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -515,7 +513,8 @@ get: } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, ovl_layer_lower(this)->fsid, - p->name, p->len); + p->name, p->len, + ovl_xino_warn(dir->d_sb)); } out: @@ -529,7 +528,7 @@ fail: goto out; } -static int ovl_fill_plain(struct dir_context *ctx, const char *name, +static bool ovl_fill_plain(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -541,14 +540,14 @@ static int ovl_fill_plain(struct dir_context *ctx, const char *name, p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; - return -ENOMEM; + return false; } list_add_tail(&p->l_node, rdd->list); - return 0; + return true; } -static int ovl_dir_read_impure(struct path *path, struct list_head *list, +static int ovl_dir_read_impure(const struct path *path, struct list_head *list, struct rb_root *root) { int err; @@ -593,10 +592,11 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list, return 0; } -static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) +static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) { int res; struct dentry *dentry = path->dentry; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct ovl_dir_cache *cache; cache = ovl_dir_cache(d_inode(dentry)); @@ -623,8 +623,8 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) * Removing the "impure" xattr is best effort. */ if (!ovl_want_write(dentry)) { - ovl_do_removexattr(ovl_dentry_upper(dentry), - OVL_XATTR_IMPURE); + ovl_removexattr(ofs, ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); ovl_drop_write(dentry); } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); @@ -645,9 +645,10 @@ struct ovl_readdir_translate { u64 parent_ino; int fsid; int xinobits; + bool xinowarn; }; -static int ovl_fill_real(struct dir_context *ctx, const char *name, +static bool ovl_fill_real(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -665,7 +666,7 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, ino = p->ino; } else if (rdt->xinobits) { ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, - name, namelen); + name, namelen, rdt->xinowarn); } return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); @@ -696,6 +697,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) .ctx.actor = ovl_fill_real, .orig_ctx = ctx, .xinobits = ovl_xino_bits(dir->d_sb), + .xinowarn = ovl_xino_warn(dir->d_sb), }; if (rdt.xinobits && lower_layer) @@ -732,8 +734,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; struct ovl_cache_entry *p; + const struct cred *old_cred; int err; + old_cred = ovl_override_creds(dentry->d_sb); if (!ctx->pos) ovl_dir_reset(file); @@ -747,17 +751,20 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) (ovl_same_fs(dentry->d_sb) && (ovl_is_impure_dir(file) || OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { - return ovl_iterate_real(file, ctx); + err = ovl_iterate_real(file, ctx); + } else { + err = iterate_dir(od->realfile, ctx); } - return iterate_dir(od->realfile, ctx); + goto out; } if (!od->cache) { struct ovl_dir_cache *cache; cache = ovl_cache_get(dentry); + err = PTR_ERR(cache); if (IS_ERR(cache)) - return PTR_ERR(cache); + goto out; od->cache = cache; ovl_seek_cursor(od, ctx->pos); @@ -769,15 +776,21 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) if (!p->ino) { err = ovl_cache_update_ino(&file->f_path, p); if (err) - return err; + goto out; } + } + /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; } od->cursor = p->l_node.next; ctx->pos++; } - return 0; + err = 0; +out: + revert_creds(old_cred); + return err; } static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) @@ -820,47 +833,77 @@ out_unlock: return res; } -static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - int datasync) +static struct file *ovl_dir_open_realfile(const struct file *file, + const struct path *realpath) { + struct file *res; + const struct cred *old_cred; + + old_cred = ovl_override_creds(file_inode(file)->i_sb); + res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); + revert_creds(old_cred); + + return res; +} + +/* + * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. + * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. + * + * TODO: use same abstract type for file->private_data of dir and file so + * upperfile could also be cached for files as well. + */ +struct file *ovl_dir_real_file(const struct file *file, bool want_upper) +{ + struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; - struct file *realfile = od->realfile; + struct file *old, *realfile = od->realfile; - /* Nothing to sync for lower */ if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) - return 0; + return want_upper ? NULL : realfile; /* * Need to check if we started out being a lower dir, but got copied up */ if (!od->is_upper) { - struct inode *inode = file_inode(file); - realfile = READ_ONCE(od->upperfile); if (!realfile) { struct path upperpath; ovl_path_upper(dentry, &upperpath); - realfile = ovl_path_open(&upperpath, O_RDONLY); - - inode_lock(inode); - if (!od->upperfile) { - if (IS_ERR(realfile)) { - inode_unlock(inode); - return PTR_ERR(realfile); - } - smp_store_release(&od->upperfile, realfile); - } else { - /* somebody has beaten us to it */ - if (!IS_ERR(realfile)) - fput(realfile); - realfile = od->upperfile; + realfile = ovl_dir_open_realfile(file, &upperpath); + if (IS_ERR(realfile)) + return realfile; + + old = cmpxchg_release(&od->upperfile, NULL, realfile); + if (old) { + fput(realfile); + realfile = old; } - inode_unlock(inode); } } + return realfile; +} + +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct file *realfile; + int err; + + err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb)); + if (err <= 0) + return err; + + realfile = ovl_dir_real_file(file, true); + err = PTR_ERR_OR_ZERO(realfile); + + /* Nothing to sync for lower */ + if (!realfile || err) + return err; + return vfs_fsync_range(realfile, start, end, datasync); } @@ -893,7 +936,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) return -ENOMEM; type = ovl_path_real(file->f_path.dentry, &realpath); - realfile = ovl_path_open(&realpath, file->f_flags); + realfile = ovl_dir_open_realfile(file, &realpath); if (IS_ERR(realfile)) { kfree(od); return PTR_ERR(realfile); @@ -958,7 +1001,8 @@ del_entry: return err; } -void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) +void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, + struct list_head *list) { struct ovl_cache_entry *p; @@ -969,7 +1013,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) if (WARN_ON(!p->is_whiteout || !p->is_upper)) continue; - dentry = lookup_one_len(p->name, upper, p->len); + dentry = ovl_lookup_upper(ofs, p->name, upper, p->len); if (IS_ERR(dentry)) { pr_err("lookup '%s/%.*s' failed (%i)\n", upper->d_name.name, p->len, p->name, @@ -977,13 +1021,13 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) continue; } if (dentry->d_inode) - ovl_cleanup(upper->d_inode, dentry); + ovl_cleanup(ofs, upper->d_inode, dentry); dput(dentry); } inode_unlock(upper->d_inode); } -static int ovl_check_d_type(struct dir_context *ctx, const char *name, +static bool ovl_check_d_type(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -992,19 +1036,19 @@ static int ovl_check_d_type(struct dir_context *ctx, const char *name, /* Even if d_type is not supported, DT_DIR is returned for . and .. */ if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) - return 0; + return true; if (d_type != DT_UNKNOWN) rdd->d_type_supported = true; - return 0; + return true; } /* * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values * if error is encountered. */ -int ovl_check_d_type_supported(struct path *realpath) +int ovl_check_d_type_supported(const struct path *realpath) { int err; struct ovl_readdir_data rdd = { @@ -1019,7 +1063,10 @@ int ovl_check_d_type_supported(struct path *realpath) return rdd.d_type_supported; } -static void ovl_workdir_cleanup_recurse(struct path *path, int level) +#define OVL_INCOMPATDIR_NAME "incompat" + +static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, + int level) { int err; struct inode *dir = path->dentry->d_inode; @@ -1033,6 +1080,19 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level) .root = &root, .is_lowest = false, }; + bool incompat = false; + + /* + * The "work/incompat" directory is treated specially - if it is not + * empty, instead of printing a generic error and mounting read-only, + * we will error about incompat features and fail the mount. + * + * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name + * starts with '#'. + */ + if (level == 2 && + !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) + incompat = true; err = ovl_dir_read(path, &rdd); if (err) @@ -1047,38 +1107,48 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level) continue; if (p->len == 2 && p->name[1] == '.') continue; + } else if (incompat) { + pr_err("overlay with incompat feature '%s' cannot be mounted\n", + p->name); + err = -EINVAL; + break; } - dentry = lookup_one_len(p->name, path->dentry, p->len); + dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len); if (IS_ERR(dentry)) continue; if (dentry->d_inode) - ovl_workdir_cleanup(dir, path->mnt, dentry, level); + err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); dput(dentry); + if (err) + break; } inode_unlock(dir); out: ovl_cache_free(&list); + return err; } -void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, - struct dentry *dentry, int level) +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, + struct vfsmount *mnt, struct dentry *dentry, int level) { int err; if (!d_is_dir(dentry) || level > 1) { - ovl_cleanup(dir, dentry); - return; + return ovl_cleanup(ofs, dir, dentry); } - err = ovl_do_rmdir(dir, dentry); + err = ovl_do_rmdir(ofs, dir, dentry); if (err) { struct path path = { .mnt = mnt, .dentry = dentry }; inode_unlock(dir); - ovl_workdir_cleanup_recurse(&path, level + 1); + err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); inode_lock_nested(dir, I_MUTEX_PARENT); - ovl_cleanup(dir, dentry); + if (!err) + err = ovl_cleanup(ofs, dir, dentry); } + + return err; } int ovl_indexdir_cleanup(struct ovl_fs *ofs) @@ -1087,7 +1157,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) struct dentry *indexdir = ofs->indexdir; struct dentry *index = NULL; struct inode *dir = indexdir->d_inode; - struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir }; + struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; LIST_HEAD(list); struct rb_root root = RB_ROOT; struct ovl_cache_entry *p; @@ -1111,18 +1181,25 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) if (p->len == 2 && p->name[1] == '.') continue; } - index = lookup_one_len(p->name, indexdir, p->len); + index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; break; } + /* Cleanup leftover from index create/cleanup attempt */ + if (index->d_name.name[0] == '#') { + err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); + if (err) + break; + goto next; + } err = ovl_verify_index(ofs, index); if (!err) { goto next; } else if (err == -ESTALE) { /* Cleanup stale index entries */ - err = ovl_cleanup(dir, index); + err = ovl_cleanup(ofs, dir, index); } else if (err != -ENOENT) { /* * Abort mount to avoid corrupting the index if @@ -1135,10 +1212,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) * Whiteout orphan index to block future open by * handle after overlay nlink dropped to zero. */ - err = ovl_cleanup_and_whiteout(indexdir, dir, index); + err = ovl_cleanup_and_whiteout(ofs, dir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(dir, index); + err = ovl_cleanup(ofs, dir, index); } if (err) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ac967f1cb6e5..a29a8afe9b26 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -15,6 +15,7 @@ #include <linux/seq_file.h> #include <linux/posix_acl_xattr.h> #include <linux/exportfs.h> +#include <linux/file.h> #include "overlayfs.h" MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); @@ -79,7 +80,7 @@ static void ovl_dentry_release(struct dentry *dentry) static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode) { - struct dentry *real; + struct dentry *real = NULL, *lower; /* It's an overlay file */ if (inode && d_inode(dentry) == inode) @@ -98,9 +99,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (real && !inode && ovl_has_upperdata(d_inode(dentry))) return real; - real = ovl_dentry_lowerdata(dentry); - if (!real) + lower = ovl_dentry_lowerdata(dentry); + if (!lower) goto bug; + real = lower; /* Handle recursion */ real = d_real(real, inode); @@ -108,58 +110,61 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (!inode || inode == d_inode(real)) return real; bug: - WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, - inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); + WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", + __func__, dentry, inode ? inode->i_sb->s_id : "NULL", + inode ? inode->i_ino : 0, real, + real && d_inode(real) ? d_inode(real)->i_ino : 0); return dentry; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) { - struct ovl_entry *oe = dentry->d_fsdata; - unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; - - if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); - if (ret < 0) - return ret; - if (!ret) { - if (!(flags & LOOKUP_RCU)) - d_invalidate(d); - return -ESTALE; - } + if (weak) { + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) + ret = d->d_op->d_weak_revalidate(d, flags); + } else if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + ret = -ESTALE; } } - return 1; + return ret; } -static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate_common(struct dentry *dentry, + unsigned int flags, bool weak) { struct ovl_entry *oe = dentry->d_fsdata; + struct dentry *upper; unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; + upper = ovl_dentry_upper(dentry); + if (upper) + ret = ovl_revalidate_real(upper, flags, weak); - if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { - ret = d->d_op->d_weak_revalidate(d, flags); - if (ret <= 0) - break; - } + for (i = 0; ret > 0 && i < oe->numlower; i++) { + ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, + weak); } return ret; } -static const struct dentry_operations ovl_dentry_operations = { - .d_release = ovl_dentry_release, - .d_real = ovl_d_real, -}; +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, false); +} + +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, true); +} -static const struct dentry_operations ovl_reval_dentry_operations = { +static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, @@ -170,7 +175,7 @@ static struct kmem_cache *ovl_inode_cachep; static struct inode *ovl_alloc_inode(struct super_block *sb) { - struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL); if (!oi) return NULL; @@ -180,7 +185,8 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) oi->version = 0; oi->flags = 0; oi->__upperdentry = NULL; - oi->lower = NULL; + oi->lowerpath.dentry = NULL; + oi->lowerpath.layer = NULL; oi->lowerdata = NULL; mutex_init(&oi->lock); @@ -201,7 +207,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); - iput(oi->lower); + dput(oi->lowerpath.dentry); if (S_ISDIR(inode->i_mode)) ovl_dir_cache_free(inode); else @@ -210,24 +216,28 @@ static void ovl_destroy_inode(struct inode *inode) static void ovl_free_fs(struct ovl_fs *ofs) { + struct vfsmount **mounts; unsigned i; iput(ofs->workbasedir_trap); iput(ofs->indexdir_trap); iput(ofs->workdir_trap); - iput(ofs->upperdir_trap); + dput(ofs->whiteout); dput(ofs->indexdir); dput(ofs->workdir); if (ofs->workdir_locked) ovl_inuse_unlock(ofs->workbasedir); dput(ofs->workbasedir); if (ofs->upperdir_locked) - ovl_inuse_unlock(ofs->upper_mnt->mnt_root); - mntput(ofs->upper_mnt); - for (i = 1; i < ofs->numlayer; i++) { + ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); + + /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ + mounts = (struct vfsmount **) ofs->layers; + for (i = 0; i < ofs->numlayer; i++) { iput(ofs->layers[i].trap); - mntput(ofs->layers[i].mnt); + mounts[i] = ofs->layers[i].mnt; } + kern_unmount_array(mounts, ofs->numlayer); kfree(ofs->layers); for (i = 0; i < ofs->numfs; i++) free_anon_bdev(ofs->fs[i].pseudo_dev); @@ -256,12 +266,23 @@ static int ovl_sync_fs(struct super_block *sb, int wait) struct super_block *upper_sb; int ret; - if (!ofs->upper_mnt) - return 0; + ret = ovl_sync_status(ofs); + /* + * We have to always set the err, because the return value isn't + * checked in syncfs, and instead indirectly return an error via + * the sb's writeback errseq, which VFS inspects after this call. + */ + if (ret < 0) { + errseq_set(&sb->s_wb_err, -EIO); + return -EIO; + } + + if (!ret) + return ret; /* - * If this is a sync(2) call or an emergency sync, all the super blocks - * will be iterated, including upper_sb, so no need to do anything. + * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). + * All the super blocks will be iterated, including upper_sb. * * If this is a syncfs(2) call, then we do need to call * sync_filesystem() on upper_sb, but enough if we do it when being @@ -270,7 +291,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - upper_sb = ofs->upper_mnt->mnt_sb; + upper_sb = ovl_upper_mnt(ofs)->mnt_sb; down_read(&upper_sb->s_umount); ret = sync_filesystem(upper_sb); @@ -281,7 +302,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait) /** * ovl_statfs - * @sb: The overlayfs super block + * @dentry: The dentry to query * @buf: The struct kstatfs to fill in with stats * * Get the filesystem statistics. As writes always target the upper layer @@ -308,7 +329,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) /* Will this overlay be forced to mount/remount ro? */ static bool ovl_force_readonly(struct ovl_fs *ofs) { - return (!ofs->upper_mnt || !ofs->workdir); + return (!ovl_upper_mnt(ofs) || !ofs->workdir); } static const char *ovl_redirect_mode_def(void) @@ -316,12 +337,6 @@ static const char *ovl_redirect_mode_def(void) return ovl_redirect_dir_def ? "on" : "off"; } -enum { - OVL_XINO_OFF, - OVL_XINO_AUTO, - OVL_XINO_ON, -}; - static const char * const ovl_xino_str[] = { "off", "auto", @@ -335,6 +350,8 @@ static inline int ovl_xino_def(void) /** * ovl_show_options + * @m: the seq_file handle + * @dentry: The dentry to query * * Prints the mount options for a given superblock. * Returns zero; does not fail. @@ -355,6 +372,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); if (ofs->config.index != ovl_index_def) seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); + if (!ofs->config.uuid) + seq_puts(m, ",uuid=off"); if (ofs->config.nfs_export != ovl_nfs_export_def) seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? "on" : "off"); @@ -363,17 +382,32 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", ofs->config.metacopy ? "on" : "off"); + if (ofs->config.ovl_volatile) + seq_puts(m, ",volatile"); + if (ofs->config.userxattr) + seq_puts(m, ",userxattr"); return 0; } static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ofs = sb->s_fs_info; + struct super_block *upper_sb; + int ret = 0; if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) return -EROFS; - return 0; + if (*flags & SB_RDONLY && !sb_rdonly(sb)) { + upper_sb = ovl_upper_mnt(ofs)->mnt_sb; + if (ovl_should_sync(ofs)) { + down_read(&upper_sb->s_umount); + ret = sync_filesystem(upper_sb); + up_read(&upper_sb->s_umount); + } + } + + return ret; } static const struct super_operations ovl_super_operations = { @@ -396,13 +430,17 @@ enum { OPT_REDIRECT_DIR, OPT_INDEX_ON, OPT_INDEX_OFF, + OPT_UUID_ON, + OPT_UUID_OFF, OPT_NFS_EXPORT_ON, + OPT_USERXATTR, OPT_NFS_EXPORT_OFF, OPT_XINO_ON, OPT_XINO_OFF, OPT_XINO_AUTO, OPT_METACOPY_ON, OPT_METACOPY_OFF, + OPT_VOLATILE, OPT_ERR, }; @@ -414,6 +452,9 @@ static const match_table_t ovl_tokens = { {OPT_REDIRECT_DIR, "redirect_dir=%s"}, {OPT_INDEX_ON, "index=on"}, {OPT_INDEX_OFF, "index=off"}, + {OPT_USERXATTR, "userxattr"}, + {OPT_UUID_ON, "uuid=on"}, + {OPT_UUID_OFF, "uuid=off"}, {OPT_NFS_EXPORT_ON, "nfs_export=on"}, {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, {OPT_XINO_ON, "xino=on"}, @@ -421,6 +462,7 @@ static const match_table_t ovl_tokens = { {OPT_XINO_AUTO, "xino=auto"}, {OPT_METACOPY_ON, "metacopy=on"}, {OPT_METACOPY_OFF, "metacopy=off"}, + {OPT_VOLATILE, "volatile"}, {OPT_ERR, NULL} }; @@ -475,6 +517,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) char *p; int err; bool metacopy_opt = false, redirect_opt = false; + bool nfs_export_opt = false, index_opt = false; config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); if (!config->redirect_mode) @@ -524,18 +567,30 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) case OPT_INDEX_ON: config->index = true; + index_opt = true; break; case OPT_INDEX_OFF: config->index = false; + index_opt = true; + break; + + case OPT_UUID_ON: + config->uuid = true; + break; + + case OPT_UUID_OFF: + config->uuid = false; break; case OPT_NFS_EXPORT_ON: config->nfs_export = true; + nfs_export_opt = true; break; case OPT_NFS_EXPORT_OFF: config->nfs_export = false; + nfs_export_opt = true; break; case OPT_XINO_ON: @@ -557,6 +612,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) case OPT_METACOPY_OFF: config->metacopy = false; + metacopy_opt = true; + break; + + case OPT_VOLATILE: + config->ovl_volatile = true; + break; + + case OPT_USERXATTR: + config->userxattr = true; break; default: @@ -566,12 +630,24 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) } } - /* Workdir is useless in non-upper mount */ - if (!config->upperdir && config->workdir) { - pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", - config->workdir); - kfree(config->workdir); - config->workdir = NULL; + /* Workdir/index are useless in non-upper mount */ + if (!config->upperdir) { + if (config->workdir) { + pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + config->workdir); + kfree(config->workdir); + config->workdir = NULL; + } + if (config->index && index_opt) { + pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n"); + index_opt = false; + } + config->index = false; + } + + if (!config->upperdir && config->ovl_volatile) { + pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); + config->ovl_volatile = false; } err = ovl_parse_redirect_mode(config, config->redirect_mode); @@ -606,6 +682,72 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) } } + /* Resolve nfs_export -> index dependency */ + if (config->nfs_export && !config->index) { + if (!config->upperdir && config->redirect_follow) { + pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + config->nfs_export = false; + } else if (nfs_export_opt && index_opt) { + pr_err("conflicting options: nfs_export=on,index=off\n"); + return -EINVAL; + } else if (index_opt) { + /* + * There was an explicit index=off that resulted + * in this conflict. + */ + pr_info("disabling nfs_export due to index=off\n"); + config->nfs_export = false; + } else { + /* Automatically enable index otherwise. */ + config->index = true; + } + } + + /* Resolve nfs_export -> !metacopy dependency */ + if (config->nfs_export && config->metacopy) { + if (nfs_export_opt && metacopy_opt) { + pr_err("conflicting options: nfs_export=on,metacopy=on\n"); + return -EINVAL; + } + if (metacopy_opt) { + /* + * There was an explicit metacopy=on that resulted + * in this conflict. + */ + pr_info("disabling nfs_export due to metacopy=on\n"); + config->nfs_export = false; + } else { + /* + * There was an explicit nfs_export=on that resulted + * in this conflict. + */ + pr_info("disabling metacopy due to nfs_export=on\n"); + config->metacopy = false; + } + } + + + /* Resolve userxattr -> !redirect && !metacopy dependency */ + if (config->userxattr) { + if (config->redirect_follow && redirect_opt) { + pr_err("conflicting options: userxattr,redirect_dir=%s\n", + config->redirect_mode); + return -EINVAL; + } + if (config->metacopy && metacopy_opt) { + pr_err("conflicting options: userxattr,metacopy=on\n"); + return -EINVAL; + } + /* + * Silently disable default setting of redirect and metacopy. + * This shall be the default in the future as well: these + * options must be explicitly enabled if used together with + * userxattr. + */ + config->redirect_dir = config->redirect_follow = false; + config->metacopy = false; + } + return 0; } @@ -616,17 +758,14 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, const char *name, bool persist) { struct inode *dir = ofs->workbasedir->d_inode; - struct vfsmount *mnt = ofs->upper_mnt; + struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *work; int err; bool retried = false; - bool locked = false; inode_lock_nested(dir, I_MUTEX_PARENT); - locked = true; - retry: - work = lookup_one_len(name, ofs->workbasedir, strlen(name)); + work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -643,15 +782,23 @@ retry: goto out_unlock; retried = true; - ovl_workdir_cleanup(dir, mnt, work, 0); + err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); dput(work); + if (err == -EINVAL) { + work = ERR_PTR(err); + goto out_unlock; + } goto retry; } - work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); - err = PTR_ERR(work); - if (IS_ERR(work)) - goto out_err; + err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); + if (err) + goto out_dput; + + /* Weird filesystem returning with hashed negative (kernfs)? */ + err = -EINVAL; + if (d_really_is_negative(work)) + goto out_dput; /* * Try to remove POSIX ACL xattrs from workdir. We are good if: @@ -666,17 +813,19 @@ retry: * allowed as upper are limited to "normal" ones, where checking * for the above two errors is sufficient. */ - err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); + err = ovl_do_removexattr(ofs, work, + XATTR_NAME_POSIX_ACL_DEFAULT); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; - err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); + err = ovl_do_removexattr(ofs, work, + XATTR_NAME_POSIX_ACL_ACCESS); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; /* Clear any inherited mode bits */ inode_lock(work->d_inode); - err = notify_change(work, &attr, NULL); + err = ovl_do_notify_change(ofs, work, &attr); inode_unlock(work->d_inode); if (err) goto out_dput; @@ -685,9 +834,7 @@ retry: goto out_err; } out_unlock: - if (locked) - inode_unlock(dir); - + inode_unlock(dir); return work; out_dput: @@ -751,19 +898,18 @@ static int ovl_mount_dir(const char *name, struct path *path) ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - if (!err) - if (ovl_dentry_remote(path->dentry)) { - pr_err("filesystem on '%s' not supported as upperdir\n", - tmp); - path_put_init(path); - err = -EINVAL; - } + if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { + pr_err("filesystem on '%s' not supported as upperdir\n", + tmp); + path_put_init(path); + err = -EINVAL; + } kfree(tmp); } return err; } -static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, +static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, const char *name) { struct kstatfs statfs; @@ -778,24 +924,21 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, } static int ovl_lower_dir(const char *name, struct path *path, - struct ovl_fs *ofs, int *stack_depth, bool *remote) + struct ovl_fs *ofs, int *stack_depth) { int fh_type; int err; err = ovl_mount_dir_noesc(name, path); if (err) - goto out; + return err; err = ovl_check_namelen(path, ofs, name); if (err) - goto out_put; + return err; *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); - if (ovl_dentry_remote(path->dentry)) - *remote = true; - /* * The inodes index feature and NFS export need to encode and decode * file handles, so they require that all layers support them. @@ -808,17 +951,22 @@ static int ovl_lower_dir(const char *name, struct path *path, pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", name); } + /* + * Decoding origin file handle is required for persistent st_ino. + * Without persistent st_ino, xino=auto falls back to xino=off. + */ + if (ofs->config.xino == OVL_XINO_AUTO && + ofs->config.upperdir && !fh_type) { + ofs->config.xino = OVL_XINO_OFF; + pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n", + name); + } /* Check if lower fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) ofs->xino_mode = -1; return 0; - -out_put: - path_put_init(path); -out: - return err; } /* Workdir should not be subdir of upperdir and vice versa */ @@ -863,6 +1011,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, static int __maybe_unused ovl_posix_acl_xattr_set(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -874,7 +1023,20 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, /* Check that everything is OK before copy-up */ if (value) { - acl = posix_acl_from_xattr(&init_user_ns, value, size); + /* The above comment can be understood in two ways: + * + * 1. We just want to check whether the basic POSIX ACL format + * is ok. For example, if the header is correct and the size + * is sane. + * 2. We want to know whether the ACL_{GROUP,USER} entries can + * be mapped according to the underlying filesystem. + * + * Currently, we only check 1. If we wanted to check 2. we + * would need to pass the mnt_userns and the fs_userns of the + * underlying filesystem. But frankly, I think checking 1. is + * enough to start the copy-up. + */ + acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); } @@ -888,7 +1050,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, goto out_acl_release; } err = -EPERM; - if (!inode_owner_or_capable(inode)) + if (!inode_owner_or_capable(&init_user_ns, inode)) goto out_acl_release; posix_acl_release(acl); @@ -900,18 +1062,15 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, if (unlikely(inode->i_mode & S_ISGID) && handler->flags == ACL_TYPE_ACCESS && !in_group_p(inode->i_gid) && - !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { + !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) { struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; - err = ovl_setattr(dentry, &iattr); + err = ovl_setattr(&init_user_ns, dentry, &iattr); if (err) return err; } err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); - if (!err) - ovl_copyattr(ovl_inode_real(inode), inode); - return err; out_acl_release: @@ -927,6 +1086,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler, } static int ovl_own_xattr_set(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -942,6 +1102,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, } static int ovl_other_xattr_set(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -965,8 +1126,14 @@ ovl_posix_acl_default_xattr_handler = { .set = ovl_posix_acl_xattr_set, }; -static const struct xattr_handler ovl_own_xattr_handler = { - .prefix = OVL_XATTR_PREFIX, +static const struct xattr_handler ovl_own_trusted_xattr_handler = { + .prefix = OVL_XATTR_TRUSTED_PREFIX, + .get = ovl_own_xattr_get, + .set = ovl_own_xattr_set, +}; + +static const struct xattr_handler ovl_own_user_xattr_handler = { + .prefix = OVL_XATTR_USER_PREFIX, .get = ovl_own_xattr_get, .set = ovl_own_xattr_set, }; @@ -977,12 +1144,22 @@ static const struct xattr_handler ovl_other_xattr_handler = { .set = ovl_other_xattr_set, }; -static const struct xattr_handler *ovl_xattr_handlers[] = { +static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { #ifdef CONFIG_FS_POSIX_ACL &ovl_posix_acl_access_xattr_handler, &ovl_posix_acl_default_xattr_handler, #endif - &ovl_own_xattr_handler, + &ovl_own_trusted_xattr_handler, + &ovl_other_xattr_handler, + NULL +}; + +static const struct xattr_handler *ovl_user_xattr_handlers[] = { +#ifdef CONFIG_FS_POSIX_ACL + &ovl_posix_acl_access_xattr_handler, + &ovl_posix_acl_default_xattr_handler, +#endif + &ovl_own_user_xattr_handler, &ovl_other_xattr_handler, NULL }; @@ -1025,7 +1202,7 @@ static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) } static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, - struct path *upperpath) + struct ovl_layer *upper_layer, struct path *upperpath) { struct vfsmount *upper_mnt; int err; @@ -1034,8 +1211,8 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, if (err) goto out; - /* Upper fs should not be r/o */ - if (sb_rdonly(upperpath->mnt->mnt_sb)) { + /* Upperdir path should not be r/o */ + if (__mnt_is_readonly(upperpath->mnt)) { pr_err("upper fs is r/o, try multi-lower layers mount\n"); err = -EINVAL; goto out; @@ -1045,7 +1222,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, if (err) goto out; - err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap, + err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, "upperdir"); if (err) goto out; @@ -1059,9 +1236,23 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, /* Don't inherit atime flags */ upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); - ofs->upper_mnt = upper_mnt; + upper_layer->mnt = upper_mnt; + upper_layer->idx = 0; + upper_layer->fsid = 0; + + /* + * Inherit SB_NOSEC flag from upperdir. + * + * This optimization changes behavior when a security related attribute + * (suid/sgid/security.*) is changed on an underlying layer. This is + * okay because we don't yet have guarantees in that case, but it will + * need careful treatment once we want to honour changes to underlying + * filesystems. + */ + if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) + sb->s_flags |= SB_NOSEC; - if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) { + if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { ofs->upperdir_locked = true; } else { err = ovl_report_in_use(ofs, "upperdir"); @@ -1074,11 +1265,115 @@ out: return err; } -static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, - struct path *workpath) +/* + * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and + * negative values if error is encountered. + */ +static int ovl_check_rename_whiteout(struct ovl_fs *ofs) { - struct vfsmount *mnt = ofs->upper_mnt; + struct dentry *workdir = ofs->workdir; + struct inode *dir = d_inode(workdir); struct dentry *temp; + struct dentry *dest; + struct dentry *whiteout; + struct name_snapshot name; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); + err = PTR_ERR(temp); + if (IS_ERR(temp)) + goto out_unlock; + + dest = ovl_lookup_temp(ofs, workdir); + err = PTR_ERR(dest); + if (IS_ERR(dest)) { + dput(temp); + goto out_unlock; + } + + /* Name is inline and stable - using snapshot as a copy helper */ + take_dentry_name_snapshot(&name, temp); + err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); + if (err) { + if (err == -EINVAL) + err = 0; + goto cleanup_temp; + } + + whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto cleanup_temp; + + err = ovl_is_whiteout(whiteout); + + /* Best effort cleanup of whiteout and temp file */ + if (err) + ovl_cleanup(ofs, dir, whiteout); + dput(whiteout); + +cleanup_temp: + ovl_cleanup(ofs, dir, temp); + release_dentry_name_snapshot(&name); + dput(temp); + dput(dest); + +out_unlock: + inode_unlock(dir); + + return err; +} + +static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, + struct dentry *parent, + const char *name, umode_t mode) +{ + size_t len = strlen(name); + struct dentry *child; + + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + child = ovl_lookup_upper(ofs, name, parent, len); + if (!IS_ERR(child) && !child->d_inode) + child = ovl_create_real(ofs, parent->d_inode, child, + OVL_CATTR(mode)); + inode_unlock(parent->d_inode); + dput(parent); + + return child; +} + +/* + * Creates $workdir/work/incompat/volatile/dirty file if it is not already + * present. + */ +static int ovl_create_volatile_dirty(struct ovl_fs *ofs) +{ + unsigned int ctr; + struct dentry *d = dget(ofs->workbasedir); + static const char *const volatile_path[] = { + OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" + }; + const char *const *name = volatile_path; + + for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { + d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG); + if (IS_ERR(d)) + return PTR_ERR(d); + } + dput(d); + return 0; +} + +static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, + const struct path *workpath) +{ + struct vfsmount *mnt = ovl_upper_mnt(ofs); + struct dentry *workdir; + struct file *tmpfile; + bool rename_whiteout; + bool d_type; int fh_type; int err; @@ -1086,10 +1381,13 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (err) return err; - ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); - if (!ofs->workdir) + workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); + err = PTR_ERR(workdir); + if (IS_ERR_OR_NULL(workdir)) goto out; + ofs->workdir = workdir; + err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); if (err) goto out; @@ -1104,33 +1402,77 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; - /* - * We allowed this configuration and don't want to break users over - * kernel upgrade. So warn instead of erroring out. - */ - if (!err) + d_type = err; + if (!d_type) pr_warn("upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ - temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); - ofs->tmpfile = !IS_ERR(temp); + tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0); + ofs->tmpfile = !IS_ERR(tmpfile); if (ofs->tmpfile) - dput(temp); + fput(tmpfile); else pr_warn("upper fs does not support tmpfile.\n"); + + /* Check if upper/work fs supports RENAME_WHITEOUT */ + err = ovl_check_rename_whiteout(ofs); + if (err < 0) + goto out; + + rename_whiteout = err; + if (!rename_whiteout) + pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); + /* - * Check if upper/work fs supports trusted.overlay.* xattr + * Check if upper/work fs supports (trusted|user).overlay.* xattr */ - err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); + err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); if (err) { + pr_warn("failed to set xattr on upper\n"); ofs->noxattr = true; - ofs->config.index = false; - ofs->config.metacopy = false; - pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); + if (ofs->config.index || ofs->config.metacopy) { + ofs->config.index = false; + ofs->config.metacopy = false; + pr_warn("...falling back to index=off,metacopy=off.\n"); + } + /* + * xattr support is required for persistent st_ino. + * Without persistent st_ino, xino=auto falls back to xino=off. + */ + if (ofs->config.xino == OVL_XINO_AUTO) { + ofs->config.xino = OVL_XINO_OFF; + pr_warn("...falling back to xino=off.\n"); + } + if (err == -EPERM && !ofs->config.userxattr) + pr_info("try mounting with 'userxattr' option\n"); err = 0; } else { - vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); + ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); + } + + /* + * We allowed sub-optimal upper fs configuration and don't want to break + * users over kernel upgrade, but we never allowed remote upper fs, so + * we can enforce strict requirements for remote upper fs. + */ + if (ovl_dentry_remote(ofs->workdir) && + (!d_type || !rename_whiteout || ofs->noxattr)) { + pr_err("upper fs missing required features.\n"); + err = -EINVAL; + goto out; + } + + /* + * For volatile mount, create a incompat/volatile/dirty file to keep + * track of it. + */ + if (ofs->config.ovl_volatile) { + err = ovl_create_volatile_dirty(ofs); + if (err < 0) { + pr_err("Failed to create volatile/dirty file.\n"); + goto out; + } } /* Check if upper/work fs supports file handles */ @@ -1155,7 +1497,7 @@ out: } static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, - struct path *upperpath) + const struct path *upperpath) { int err; struct path workpath = { }; @@ -1198,9 +1540,10 @@ out: } static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, - struct ovl_entry *oe, struct path *upperpath) + struct ovl_entry *oe, const struct path *upperpath) { - struct vfsmount *mnt = ofs->upper_mnt; + struct vfsmount *mnt = ovl_upper_mnt(ofs); + struct dentry *indexdir; int err; err = mnt_want_write(mnt); @@ -1208,15 +1551,25 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, return err; /* Verify lower root is upper root origin */ - err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, - true); + err = ovl_verify_origin(ofs, upperpath->dentry, + oe->lowerstack[0].dentry, true); if (err) { pr_err("failed to verify upper root origin\n"); goto out; } - ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); - if (ofs->indexdir) { + /* index dir will act also as workdir */ + iput(ofs->workdir_trap); + ofs->workdir_trap = NULL; + dput(ofs->workdir); + ofs->workdir = NULL; + indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); + if (IS_ERR(indexdir)) { + err = PTR_ERR(indexdir); + } else if (indexdir) { + ofs->indexdir = indexdir; + ofs->workdir = dget(indexdir); + err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, "indexdir"); if (err) @@ -1224,19 +1577,21 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, /* * Verify upper root is exclusively associated with index dir. - * Older kernels stored upper fh in "trusted.overlay.origin" + * Older kernels stored upper fh in ".overlay.origin" * xattr. If that xattr exists, verify that it is a match to * upper dir file handle. In any case, verify or set xattr - * "trusted.overlay.upper" to indicate that index may have + * ".overlay.upper" to indicate that index may have * directory entries. */ - if (ovl_check_origin_xattr(ofs->indexdir)) { - err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, + if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { + err = ovl_verify_set_fh(ofs, ofs->indexdir, + OVL_XATTR_ORIGIN, upperpath->dentry, true, false); if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } - err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); + err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry, + true); if (err) pr_err("failed to verify index dir 'upper' xattr\n"); @@ -1256,9 +1611,20 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) { unsigned int i; - if (!ofs->config.nfs_export && !ofs->upper_mnt) + if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) return true; + /* + * We allow using single lower with null uuid for index and nfs_export + * for example to support those features with single lower squashfs. + * To avoid regressions in setups of overlay with re-formatted lower + * squashfs, do not allow decoding origin with lower null uuid unless + * user opted-in to one of the new features that require following the + * lower inode of non-dir upper. + */ + if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) + return false; + for (i = 0; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a @@ -1284,6 +1650,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) dev_t dev; int err; bool bad_uuid = false; + bool warn = false; for (i = 0; i < ofs->numfs; i++) { if (ofs->fs[i].sb == sb) @@ -1292,13 +1659,20 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { bad_uuid = true; + if (ofs->config.xino == OVL_XINO_AUTO) { + ofs->config.xino = OVL_XINO_OFF; + warn = true; + } if (ofs->config.index || ofs->config.nfs_export) { ofs->config.index = false; ofs->config.nfs_export = false; - pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", + warn = true; + } + if (warn) { + pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n", uuid_is_null(&sb->s_uuid) ? "null" : "conflicting", - path->dentry); + path->dentry, ovl_xino_str[ofs->config.xino]); } } @@ -1316,18 +1690,13 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) } static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, - struct path *stack, unsigned int numlower) + struct path *stack, unsigned int numlower, + struct ovl_layer *layers) { int err; unsigned int i; - struct ovl_layer *layers; err = -ENOMEM; - layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); - if (!layers) - goto out; - ofs->layers = layers; - ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); if (ofs->fs == NULL) goto out; @@ -1335,11 +1704,6 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ ofs->numfs++; - layers[0].mnt = ofs->upper_mnt; - layers[0].idx = 0; - layers[0].fsid = 0; - ofs->numlayer = 1; - /* * All lower layers that share the same fs as upper layer, use the same * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower @@ -1352,8 +1716,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, goto out; } - if (ofs->upper_mnt) { - ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; + if (ovl_upper_mnt(ofs)) { + ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; ofs->fs[0].is_lower = false; } @@ -1366,14 +1730,23 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; + /* + * Check if lower root conflicts with this overlay layers before + * checking if it is in-use as upperdir/workdir of "another" + * mount, because we do not bother to check in ovl_is_inuse() if + * the upperdir/workdir is in fact in-use by our + * upperdir/workdir. + */ err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); if (err) goto out; if (ovl_is_inuse(stack[i].dentry)) { err = ovl_report_in_use(ofs, "lowerdir"); - if (err) + if (err) { + iput(trap); goto out; + } } mnt = clone_private_mount(&stack[i]); @@ -1401,26 +1774,29 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, /* * When all layers on same fs, overlay can use real inode numbers. - * With mount option "xino=on", mounter declares that there are enough - * free high bits in underlying fs to hold the unique fsid. + * With mount option "xino=<on|auto>", mounter declares that there are + * enough free high bits in underlying fs to hold the unique fsid. * If overlayfs does encounter underlying inodes using the high xino * bits reserved for fsid, it emits a warning and uses the original - * inode number. + * inode number or a non persistent inode number allocated from a + * dedicated range. */ - if (ofs->numfs - !ofs->upper_mnt == 1) { + if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { if (ofs->config.xino == OVL_XINO_ON) pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); ofs->xino_mode = 0; } else if (ofs->config.xino == OVL_XINO_OFF) { ofs->xino_mode = -1; - } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { + } else if (ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for encoding - * fsid, where fsid 0 is reserved for upper fs even with - * lower only overlay. + * fsid, where fsid 0 is reserved for upper fs (even with + * lower only overlay) +1 extra bit is reserved for the non + * persistent inode number range that is used for resolving + * xino lower bits overflow. */ - BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); - ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; + BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); + ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; } if (ofs->xino_mode > 0) { @@ -1434,45 +1810,26 @@ out: } static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, - struct ovl_fs *ofs) + const char *lower, unsigned int numlower, + struct ovl_fs *ofs, struct ovl_layer *layers) { int err; - char *lowertmp, *lower; struct path *stack = NULL; - unsigned int stacklen, numlower = 0, i; - bool remote = false; + unsigned int i; struct ovl_entry *oe; - err = -ENOMEM; - lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); - if (!lowertmp) - goto out_err; - - err = -EINVAL; - stacklen = ovl_split_lowerdirs(lowertmp); - if (stacklen > OVL_MAX_STACK) { - pr_err("too many lower directories, limit is %d\n", - OVL_MAX_STACK); - goto out_err; - } else if (!ofs->config.upperdir && stacklen == 1) { + if (!ofs->config.upperdir && numlower == 1) { pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); - goto out_err; - } else if (!ofs->config.upperdir && ofs->config.nfs_export && - ofs->config.redirect_follow) { - pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); - ofs->config.nfs_export = false; + return ERR_PTR(-EINVAL); } - err = -ENOMEM; - stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); + stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL); if (!stack) - goto out_err; + return ERR_PTR(-ENOMEM); err = -EINVAL; - lower = lowertmp; - for (numlower = 0; numlower < stacklen; numlower++) { - err = ovl_lower_dir(lower, &stack[numlower], ofs, - &sb->s_stack_depth, &remote); + for (i = 0; i < numlower; i++) { + err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth); if (err) goto out_err; @@ -1486,7 +1843,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, goto out_err; } - err = ovl_get_layers(sb, ofs, stack, numlower); + err = ovl_get_layers(sb, ofs, stack, numlower, layers); if (err) goto out_err; @@ -1500,16 +1857,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, oe->lowerstack[i].layer = &ofs->layers[i+1]; } - if (remote) - sb->s_d_op = &ovl_reval_dentry_operations; - else - sb->s_d_op = &ovl_dentry_operations; - out: for (i = 0; i < numlower; i++) path_put(&stack[i]); kfree(stack); - kfree(lowertmp); return oe; @@ -1524,7 +1875,8 @@ out_err: * - upper/work dir of any overlayfs instance */ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, - struct dentry *dentry, const char *name) + struct dentry *dentry, const char *name, + bool is_lower) { struct dentry *next = dentry, *parent; int err = 0; @@ -1536,7 +1888,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, /* Walk back ancestors to root (inclusive) looking for traps */ while (!err && parent != next) { - if (ovl_lookup_trap_inode(sb, parent)) { + if (is_lower && ovl_lookup_trap_inode(sb, parent)) { err = -ELOOP; pr_err("overlapping %s path\n", name); } else if (ovl_is_inuse(parent)) { @@ -1560,9 +1912,9 @@ static int ovl_check_overlapping_layers(struct super_block *sb, { int i, err; - if (ofs->upper_mnt) { - err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root, - "upperdir"); + if (ovl_upper_mnt(ofs)) { + err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, + "upperdir", false); if (err) return err; @@ -1573,7 +1925,8 @@ static int ovl_check_overlapping_layers(struct super_block *sb, * workbasedir. In that case, we already have their traps in * inode cache and we will catch that case on lookup. */ - err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); + err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", + false); if (err) return err; } @@ -1581,7 +1934,7 @@ static int ovl_check_overlapping_layers(struct super_block *sb, for (i = 1; i < ofs->numlayer; i++) { err = ovl_check_layer(sb, ofs, ofs->layers[i].mnt->mnt_root, - "lowerdir"); + "lowerdir", true); if (err) return err; } @@ -1589,25 +1942,77 @@ static int ovl_check_overlapping_layers(struct super_block *sb, return 0; } +static struct dentry *ovl_get_root(struct super_block *sb, + struct dentry *upperdentry, + struct ovl_entry *oe) +{ + struct dentry *root; + struct ovl_path *lowerpath = &oe->lowerstack[0]; + unsigned long ino = d_inode(lowerpath->dentry)->i_ino; + int fsid = lowerpath->layer->fsid; + struct ovl_inode_params oip = { + .upperdentry = upperdentry, + .lowerpath = lowerpath, + }; + + root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); + if (!root) + return NULL; + + root->d_fsdata = oe; + + if (upperdentry) { + /* Root inode uses upper st_ino/i_ino */ + ino = d_inode(upperdentry)->i_ino; + fsid = 0; + ovl_dentry_set_upper_alias(root); + if (ovl_is_impuredir(sb, upperdentry)) + ovl_set_flag(OVL_IMPURE, d_inode(root)); + } + + /* Root is always merge -> can have whiteouts */ + ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); + ovl_dentry_set_flag(OVL_E_CONNECTED, root); + ovl_set_upperdata(d_inode(root)); + ovl_inode_init(d_inode(root), &oip, ino, fsid); + ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); + + return root; +} + static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { }; struct dentry *root_dentry; struct ovl_entry *oe; struct ovl_fs *ofs; + struct ovl_layer *layers; struct cred *cred; + char *splitlower = NULL; + unsigned int numlower; int err; + err = -EIO; + if (WARN_ON(sb->s_user_ns != current_user_ns())) + goto out; + + sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); if (!ofs) goto out; + err = -ENOMEM; ofs->creator_cred = cred = prepare_creds(); if (!cred) goto out_err; + /* Is there a reason anyone would want not to share whiteouts? */ + ofs->share_whiteout = true; + ofs->config.index = ovl_index_def; + ofs->config.uuid = true; ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; @@ -1622,9 +2027,32 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; } + err = -ENOMEM; + splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL); + if (!splitlower) + goto out_err; + + err = -EINVAL; + numlower = ovl_split_lowerdirs(splitlower); + if (numlower > OVL_MAX_STACK) { + pr_err("too many lower directories, limit is %d\n", + OVL_MAX_STACK); + goto out_err; + } + + err = -ENOMEM; + layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); + if (!layers) + goto out_err; + + ofs->layers = layers; + /* Layer 0 is reserved for upper even if there's no upper */ + ofs->numlayer = 1; + sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; - /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ + atomic_long_set(&ofs->last_ino, 1); + /* Assume underlying fs uses 32bit inodes unless proven otherwise */ if (ofs->config.xino != OVL_XINO_OFF) { ofs->xino_mode = BITS_PER_LONG - 32; if (!ofs->xino_mode) { @@ -1637,15 +2065,28 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; if (ofs->config.upperdir) { + struct super_block *upper_sb; + + err = -EINVAL; if (!ofs->config.workdir) { pr_err("missing 'workdir'\n"); goto out_err; } - err = ovl_get_upper(sb, ofs, &upperpath); + err = ovl_get_upper(sb, ofs, &layers[0], &upperpath); if (err) goto out_err; + upper_sb = ovl_upper_mnt(ofs)->mnt_sb; + if (!ovl_should_sync(ofs)) { + ofs->errseq = errseq_sample(&upper_sb->s_wb_err); + if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { + err = -EIO; + pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); + goto out_err; + } + } + err = ovl_get_workdir(sb, ofs, &upperpath); if (err) goto out_err; @@ -1653,31 +2094,31 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ofs->workdir) sb->s_flags |= SB_RDONLY; - sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; - sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; - + sb->s_stack_depth = upper_sb->s_stack_depth; + sb->s_time_gran = upper_sb->s_time_gran; } - oe = ovl_get_lowerstack(sb, ofs); + oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers); err = PTR_ERR(oe); if (IS_ERR(oe)) goto out_err; /* If the upper fs is nonexistent, we mark overlayfs r/o too */ - if (!ofs->upper_mnt) + if (!ovl_upper_mnt(ofs)) sb->s_flags |= SB_RDONLY; - if (!(ovl_force_readonly(ofs)) && ofs->config.index) { + if (!ofs->config.uuid && ofs->numfs > 1) { + pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n"); + ofs->config.uuid = true; + } + + if (!ovl_force_readonly(ofs) && ofs->config.index) { err = ovl_get_indexdir(sb, ofs, oe, &upperpath); if (err) goto out_free_oe; /* Force r/o mount with no index dir */ - if (!ofs->indexdir) { - dput(ofs->workdir); - ofs->workdir = NULL; + if (!ofs->indexdir) sb->s_flags |= SB_RDONLY; - } - } err = ovl_check_overlapping_layers(sb, ofs); @@ -1687,7 +2128,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* Show index=off in /proc/mounts for forced r/o mount */ if (!ofs->indexdir) { ofs->config.index = false; - if (ofs->upper_mnt && ofs->config.nfs_export) { + if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } @@ -1705,30 +2146,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_xattr = ovl_xattr_handlers; + sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : + ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; + sb->s_iflags |= SB_I_SKIP_SYNC; err = -ENOMEM; - root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); + root_dentry = ovl_get_root(sb, upperpath.dentry, oe); if (!root_dentry) goto out_free_oe; - root_dentry->d_fsdata = oe; - mntput(upperpath.mnt); - if (upperpath.dentry) { - ovl_dentry_set_upper_alias(root_dentry); - if (ovl_is_impuredir(upperpath.dentry)) - ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); - } - - /* Root is always merge -> can have whiteouts */ - ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); - ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); - ovl_set_upperdata(d_inode(root_dentry)); - ovl_inode_init(d_inode(root_dentry), upperpath.dentry, - ovl_dentry_lower(root_dentry), NULL); + kfree(splitlower); sb->s_root = root_dentry; @@ -1738,6 +2168,7 @@ out_free_oe: ovl_entry_stack_free(oe); kfree(oe); out_err: + kfree(splitlower); path_put(&upperpath); ovl_free_fs(ofs); out: @@ -1753,6 +2184,7 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, .name = "overlay", + .fs_flags = FS_USERNS_MOUNT, .mount = ovl_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 042f7eb4f7f4..81a57a8d80d9 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,6 +10,7 @@ #include <linux/cred.h> #include <linux/xattr.h> #include <linux/exportfs.h> +#include <linux/fileattr.h> #include <linux/uuid.h> #include <linux/namei.h> #include <linux/ratelimit.h> @@ -18,13 +19,13 @@ int ovl_want_write(struct dentry *dentry) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - return mnt_want_write(ofs->upper_mnt); + return mnt_want_write(ovl_upper_mnt(ofs)); } void ovl_drop_write(struct dentry *dentry) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - mnt_drop_write(ofs->upper_mnt); + mnt_drop_write(ovl_upper_mnt(ofs)); } struct dentry *ovl_workdir(struct dentry *dentry) @@ -50,6 +51,9 @@ const struct cred *ovl_override_creds(struct super_block *sb) */ int ovl_can_decode_fh(struct super_block *sb) { + if (!capable(CAP_DAC_READ_SEARCH)) + return 0; + if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry) return 0; @@ -93,8 +97,24 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower) bool ovl_dentry_remote(struct dentry *dentry) { return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_REAL); + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask) +{ + struct ovl_entry *oe = OVL_E(dentry); + unsigned int i, flags = 0; + + if (upperdentry) + flags |= upperdentry->d_flags; + for (i = 0; i < oe->numlower; i++) + flags |= oe->lowerstack[i].dentry->d_flags; + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~mask; + dentry->d_flags |= flags & mask; + spin_unlock(&dentry->d_lock); } bool ovl_dentry_weird(struct dentry *dentry) @@ -134,7 +154,7 @@ void ovl_path_upper(struct dentry *dentry, struct path *path) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - path->mnt = ofs->upper_mnt; + path->mnt = ovl_upper_mnt(ofs); path->dentry = ovl_dentry_upper(dentry); } @@ -174,6 +194,20 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) return type; } +enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path) +{ + enum ovl_path_type type = ovl_path_type(dentry); + + WARN_ON_ONCE(d_is_dir(dentry)); + + if (!OVL_TYPE_UPPER(type) || OVL_TYPE_MERGE(type)) + ovl_path_lowerdata(dentry, path); + else + ovl_path_upper(dentry, path); + + return type; +} + struct dentry *ovl_dentry_upper(struct dentry *dentry) { return ovl_upperdentry_dereference(OVL_I(d_inode(dentry))); @@ -195,7 +229,7 @@ const struct ovl_layer *ovl_layer_lower(struct dentry *dentry) /* * ovl_dentry_lower() could return either a data dentry or metacopy dentry - * dependig on what is stored in lowerstack[0]. At times we need to find + * depending on what is stored in lowerstack[0]. At times we need to find * lower dentry which has data (and not metacopy dentry). This helper * returns the lower data dentry. */ @@ -216,6 +250,17 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode) return ovl_upperdentry_dereference(OVL_I(inode)); } +void ovl_i_path_real(struct inode *inode, struct path *path) +{ + path->dentry = ovl_i_dentry_upper(inode); + if (!path->dentry) { + path->dentry = OVL_I(inode)->lowerpath.dentry; + path->mnt = OVL_I(inode)->lowerpath.layer->mnt; + } else { + path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb)); + } +} + struct inode *ovl_inode_upper(struct inode *inode) { struct dentry *upperdentry = ovl_i_dentry_upper(inode); @@ -225,7 +270,9 @@ struct inode *ovl_inode_upper(struct inode *inode) struct inode *ovl_inode_lower(struct inode *inode) { - return OVL_I(inode)->lower; + struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry; + + return lowerdentry ? d_inode(lowerdentry) : NULL; } struct inode *ovl_inode_real(struct inode *inode) @@ -386,24 +433,6 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) oi->redirect = redirect; } -void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *lowerdata) -{ - struct inode *realinode = d_inode(upperdentry ?: lowerdentry); - - if (upperdentry) - OVL_I(inode)->__upperdentry = upperdentry; - if (lowerdentry) - OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); - if (lowerdata) - OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata)); - - ovl_copyattr(realinode, inode); - ovl_copyflags(realinode, inode); - if (!inode->i_ino) - inode->i_ino = realinode->i_ino; -} - void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) { struct inode *upperinode = d_inode(upperdentry); @@ -416,34 +445,34 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; if (inode_unhashed(inode)) { - if (!inode->i_ino) - inode->i_ino = upperinode->i_ino; inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } } -static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity) +static void ovl_dir_version_inc(struct dentry *dentry, bool impurity) { struct inode *inode = d_inode(dentry); WARN_ON(!inode_is_locked(inode)); + WARN_ON(!d_is_dir(dentry)); /* - * Version is used by readdir code to keep cache consistent. For merge - * dirs all changes need to be noted. For non-merge dirs, cache only - * contains impure (ones which have been copied up and have origins) - * entries, so only need to note changes to impure entries. + * Version is used by readdir code to keep cache consistent. + * For merge dirs (or dirs with origin) all changes need to be noted. + * For non-merge dirs, cache contains only impure entries (i.e. ones + * which have been copied up and have origins), so only need to note + * changes to impure entries. */ - if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity) + if (!ovl_dir_is_real(dentry) || impurity) OVL_I(inode)->version++; } void ovl_dir_modified(struct dentry *dentry, bool impurity) { /* Copy mtime/ctime */ - ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry)); + ovl_copyattr(d_inode(dentry)); - ovl_dentry_version_inc(dentry, impurity); + ovl_dir_version_inc(dentry, impurity); } u64 ovl_dentry_version_get(struct dentry *dentry) @@ -461,9 +490,35 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); } -struct file *ovl_path_open(struct path *path, int flags) +struct file *ovl_path_open(const struct path *path, int flags) { - return dentry_open(path, flags | O_NOATIME, current_cred()); + struct inode *inode = d_inode(path->dentry); + struct user_namespace *real_mnt_userns = mnt_user_ns(path->mnt); + int err, acc_mode; + + if (flags & ~(O_ACCMODE | O_LARGEFILE)) + BUG(); + + switch (flags & O_ACCMODE) { + case O_RDONLY: + acc_mode = MAY_READ; + break; + case O_WRONLY: + acc_mode = MAY_WRITE; + break; + default: + BUG(); + } + + err = inode_permission(real_mnt_userns, inode, acc_mode | MAY_OPEN); + if (err) + return ERR_PTR(err); + + /* O_NOATIME is an optimization, don't fail if not permitted */ + if (inode_owner_or_capable(real_mnt_userns, inode)) + flags |= O_NOATIME; + + return dentry_open(path, flags, current_cred()); } /* Caller should hold ovl_inode->lock */ @@ -523,11 +578,11 @@ void ovl_copy_up_end(struct dentry *dentry) ovl_inode_unlock(d_inode(dentry)); } -bool ovl_check_origin_xattr(struct dentry *dentry) +bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path) { int res; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = ovl_path_getxattr(ofs, path, OVL_XATTR_ORIGIN, NULL, 0); /* Zero size value means "copied up but origin unknown" */ if (res >= 0) @@ -536,35 +591,59 @@ bool ovl_check_origin_xattr(struct dentry *dentry) return false; } -bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox) { int res; char val; - if (!d_is_dir(dentry)) + if (!d_is_dir(path->dentry)) return false; - res = vfs_getxattr(dentry, name, &val, 1); + res = ovl_path_getxattr(ofs, path, ox, &val, 1); if (res == 1 && val == 'y') return true; return false; } -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, - const char *name, const void *value, size_t size, +#define OVL_XATTR_OPAQUE_POSTFIX "opaque" +#define OVL_XATTR_REDIRECT_POSTFIX "redirect" +#define OVL_XATTR_ORIGIN_POSTFIX "origin" +#define OVL_XATTR_IMPURE_POSTFIX "impure" +#define OVL_XATTR_NLINK_POSTFIX "nlink" +#define OVL_XATTR_UPPER_POSTFIX "upper" +#define OVL_XATTR_METACOPY_POSTFIX "metacopy" +#define OVL_XATTR_PROTATTR_POSTFIX "protattr" + +#define OVL_XATTR_TAB_ENTRY(x) \ + [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ + [true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX } + +const char *const ovl_xattr_table[][2] = { + OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_IMPURE), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), +}; + +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, + enum ovl_xattr ox, const void *value, size_t size, int xerr) { int err; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; if (ofs->noxattr) return xerr; - err = ovl_do_setxattr(upperdentry, name, value, size, 0); + err = ovl_setxattr(ofs, upperdentry, ox, value, size); if (err == -EOPNOTSUPP) { - pr_warn("cannot set %s xattr on upper\n", name); + pr_warn("cannot set %s xattr on upper\n", ovl_xattr(ofs, ox)); ofs->noxattr = true; return xerr; } @@ -574,6 +653,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) @@ -583,27 +663,93 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) * Do not fail when upper doesn't support xattrs. * Upper inodes won't have origin nor redirect xattr anyway. */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); + err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0); if (!err) ovl_set_flag(OVL_IMPURE, d_inode(dentry)); return err; } -void ovl_set_flag(unsigned long flag, struct inode *inode) -{ - set_bit(flag, &OVL_I(inode)->flags); -} -void ovl_clear_flag(unsigned long flag, struct inode *inode) +#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */ + +void ovl_check_protattr(struct inode *inode, struct dentry *upper) { - clear_bit(flag, &OVL_I(inode)->flags); + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK; + char buf[OVL_PROTATTR_MAX+1]; + int res, n; + + res = ovl_getxattr_upper(ofs, upper, OVL_XATTR_PROTATTR, buf, + OVL_PROTATTR_MAX); + if (res < 0) + return; + + /* + * Initialize inode flags from overlay.protattr xattr and upper inode + * flags. If upper inode has those fileattr flags set (i.e. from old + * kernel), we do not clear them on ovl_get_inode(), but we will clear + * them on next fileattr_set(). + */ + for (n = 0; n < res; n++) { + if (buf[n] == 'a') + iflags |= S_APPEND; + else if (buf[n] == 'i') + iflags |= S_IMMUTABLE; + else + break; + } + + if (!res || n < res) { + pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n", + upper, res); + } else { + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + } } -bool ovl_test_flag(unsigned long flag, struct inode *inode) +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa) { - return test_bit(flag, &OVL_I(inode)->flags); + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + char buf[OVL_PROTATTR_MAX]; + int len = 0, err = 0; + u32 iflags = 0; + + BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX); + + if (fa->flags & FS_APPEND_FL) { + buf[len++] = 'a'; + iflags |= S_APPEND; + } + if (fa->flags & FS_IMMUTABLE_FL) { + buf[len++] = 'i'; + iflags |= S_IMMUTABLE; + } + + /* + * Do not allow to set protection flags when upper doesn't support + * xattrs, because we do not set those fileattr flags on upper inode. + * Remove xattr if it exist and all protection flags are cleared. + */ + if (len) { + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR, + buf, len, -EPERM); + } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) { + err = ovl_removexattr(ofs, upper, OVL_XATTR_PROTATTR); + if (err == -EOPNOTSUPP || err == -ENODATA) + err = 0; + } + if (err) + return err; + + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + + /* Mask out the fileattr flags that should not be set in upper inode */ + fa->flags &= ~OVL_PROT_FS_FLAGS_MASK; + fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK; + + return 0; } /** @@ -673,6 +819,7 @@ bool ovl_need_index(struct dentry *dentry) /* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); @@ -682,7 +829,7 @@ static void ovl_cleanup_index(struct dentry *dentry) struct qstr name = { }; int err; - err = ovl_get_index_name(lowerdentry, &name); + err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail; @@ -705,16 +852,17 @@ static void ovl_cleanup_index(struct dentry *dentry) } inode_lock_nested(dir, I_MUTEX_PARENT); - index = lookup_one_len(name.name, indexdir, name.len); + index = ovl_lookup_upper(ofs, name.name, indexdir, name.len); err = PTR_ERR(index); if (IS_ERR(index)) { index = NULL; } else if (ovl_index_all(dentry->d_sb)) { /* Whiteout orphan index to block future open by handle */ - err = ovl_cleanup_and_whiteout(indexdir, dir, index); + err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb), + dir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(dir, index); + err = ovl_cleanup(ofs, dir, index); } inode_unlock(dir); @@ -823,18 +971,25 @@ err: } /* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */ -int ovl_check_metacopy_xattr(struct dentry *dentry) +int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path) { int res; /* Only regular files can have metacopy xattr */ - if (!S_ISREG(d_inode(dentry)->i_mode)) + if (!S_ISREG(d_inode(path->dentry)->i_mode)) return 0; - res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0); + res = ovl_path_getxattr(ofs, path, OVL_XATTR_METACOPY, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return 0; + /* + * getxattr on user.* may fail with EACCES in case there's no + * read permission on the inode. Not much we can do, other than + * tell the caller that this is not a metacopy inode. + */ + if (ofs->config.userxattr && res == -EACCES) + return 0; goto out; } @@ -860,49 +1015,26 @@ bool ovl_is_metacopy_dentry(struct dentry *dentry) return (oe->numlower > 1); } -ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value, - size_t padding) -{ - ssize_t res; - char *buf = NULL; - - res = vfs_getxattr(dentry, name, NULL, 0); - if (res < 0) { - if (res == -ENODATA || res == -EOPNOTSUPP) - return -ENODATA; - goto fail; - } - - if (res != 0) { - buf = kzalloc(res + padding, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - res = vfs_getxattr(dentry, name, buf, res); - if (res < 0) - goto fail; - } - *value = buf; - - return res; - -fail: - pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n", - name, res); - kfree(buf); - return res; -} - -char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) +char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding) { int res; char *s, *next, *buf = NULL; - res = ovl_getxattr(dentry, OVL_XATTR_REDIRECT, &buf, padding + 1); - if (res == -ENODATA) + res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, NULL, 0); + if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; if (res < 0) - return ERR_PTR(res); + goto fail; + if (res == 0) + goto invalid; + + buf = kzalloc(res + padding + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + res = ovl_path_getxattr(ofs, path, OVL_XATTR_REDIRECT, buf, res); + if (res < 0) + goto fail; if (res == 0) goto invalid; @@ -921,6 +1053,67 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) invalid: pr_warn_ratelimited("invalid redirect (%s)\n", buf); res = -EINVAL; + goto err_free; +fail: + pr_warn_ratelimited("failed to get redirect (%i)\n", res); +err_free: kfree(buf); return ERR_PTR(res); } + +/* + * ovl_sync_status() - Check fs sync status for volatile mounts + * + * Returns 1 if this is not a volatile mount and a real sync is required. + * + * Returns 0 if syncing can be skipped because mount is volatile, and no errors + * have occurred on the upperdir since the mount. + * + * Returns -errno if it is a volatile mount, and the error that occurred since + * the last mount. If the error code changes, it'll return the latest error + * code. + */ + +int ovl_sync_status(struct ovl_fs *ofs) +{ + struct vfsmount *mnt; + + if (ovl_should_sync(ofs)) + return 1; + + mnt = ovl_upper_mnt(ofs); + if (!mnt) + return 0; + + return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq); +} + +/* + * ovl_copyattr() - copy inode attributes from layer to ovl inode + * + * When overlay copies inode information from an upper or lower layer to the + * relevant overlay inode it will apply the idmapping of the upper or lower + * layer when doing so ensuring that the ovl inode ownership will correctly + * reflect the ownership of the idmapped upper or lower layer. For example, an + * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to + * map any lower or upper inode owned by id 1001 to id 1000. These mapping + * helpers are nops when the relevant layer isn't idmapped. + */ +void ovl_copyattr(struct inode *inode) +{ + struct path realpath; + struct inode *realinode; + struct user_namespace *real_mnt_userns; + + ovl_i_path_real(inode, &realpath); + realinode = d_inode(realpath.dentry); + real_mnt_userns = mnt_user_ns(realpath.mnt); + + inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode); + inode->i_gid = i_gid_into_mnt(real_mnt_userns, realinode); + inode->i_mode = realinode->i_mode; + inode->i_atime = realinode->i_atime; + inode->i_mtime = realinode->i_mtime; + inode->i_ctime = realinode->i_ctime; + i_size_write(inode, i_size_read(realinode)); +} |