From 4c37e71b713ecffe81f8e6273c6835e54306d412 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 22 Dec 2019 22:47:54 +0200 Subject: ovl: fix wrong WARN_ON() in ovl_cache_update_ino() The WARN_ON() that child entry is always on overlay st_dev became wrong when we allowed this function to update d_ino in non-samefs setup with xino enabled. It is not true in case of xino bits overflow on a non-dir inode. Leave the WARN_ON() only for directories, where assertion is still true. Fixes: adbf4f7ea834 ("ovl: consistent d_ino for non-samefs with xino") Cc: # v4.17+ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 47a91c9733a5..7255e6a5838f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -504,7 +504,13 @@ get: if (err) goto fail; - WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); + /* + * Directory inode is always on overlay st_dev. + * Non-dir with ovl_same_dev() could be on pseudo st_dev in case + * of xino bits overflow. + */ + WARN_ON_ONCE(S_ISDIR(stat.mode) && + dir->d_sb->s_dev != stat.dev); ino = stat.ino; } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, -- cgit v1.2.3-59-g8ed1b From 1bd0a3aea4357e1dce8b3f0f889fd3fe756353e6 Mon Sep 17 00:00:00 2001 From: lijiazi Date: Mon, 16 Dec 2019 19:12:32 +0800 Subject: ovl: use pr_fmt auto generate prefix Use pr_fmt auto generate "overlayfs: " prefix. Signed-off-by: lijiazi Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/dir.c | 10 +++--- fs/overlayfs/export.c | 12 +++---- fs/overlayfs/inode.c | 6 ++-- fs/overlayfs/namei.c | 26 +++++++-------- fs/overlayfs/overlayfs.h | 3 ++ fs/overlayfs/readdir.c | 8 ++--- fs/overlayfs/super.c | 85 ++++++++++++++++++++++++------------------------ fs/overlayfs/util.c | 14 ++++---- 9 files changed, 85 insertions(+), 81 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 6220642fe113..b168c65666e2 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -24,7 +24,7 @@ static int ovl_ccup_set(const char *buf, const struct kernel_param *param) { - pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n"); + pr_warn("\"check_copy_up\" module option is obsolete\n"); return 0; } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 29abdb1d3b5c..8e57d5372b8f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -35,7 +35,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) dput(wdentry); if (err) { - pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", + pr_err("cleanup of '%pd2' failed (%i)\n", wdentry, err); } @@ -53,7 +53,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir) temp = lookup_one_len(name, workdir, strlen(name)); if (!IS_ERR(temp) && temp->d_inode) { - pr_err("overlayfs: workdir/%s already exists\n", name); + pr_err("workdir/%s already exists\n", name); dput(temp); temp = ERR_PTR(-EIO); } @@ -134,7 +134,7 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, d = lookup_one_len(dentry->d_name.name, dentry->d_parent, dentry->d_name.len); if (IS_ERR(d)) { - pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n", + pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", dentry, err); return PTR_ERR(d); } @@ -267,7 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, d_instantiate(dentry, inode); if (inode != oip.newinode) { - pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n", + pr_warn_ratelimited("newly created inode found in cache (%pd2)\n", dentry); } @@ -1009,7 +1009,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) spin_unlock(&dentry->d_lock); } else { kfree(redirect); - pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n", + pr_warn_ratelimited("failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 70e55588aedc..2fe724294aeb 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -30,7 +30,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) } if (err) { - pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n", + pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n", dentry, err); } @@ -244,7 +244,7 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", + pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", dentry, err, buflen, fh ? (int)fh->fb.len : 0, fh ? fh->fb.type : 0); goto out; @@ -406,7 +406,7 @@ out: return this; fail: - pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", real, layer->idx, connected, err); this = ERR_PTR(err); goto out; @@ -631,7 +631,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, return connected; fail: - pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", real, layer->idx, connected, err); dput(connected); return ERR_PTR(err); @@ -822,7 +822,7 @@ out: return dentry; out_err: - pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", + pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", fh_len, fh_type, flags, err); dentry = ERR_PTR(err); goto out; @@ -831,7 +831,7 @@ out_err: static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); + pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); return ERR_PTR(-EACCES); } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b045cf1826fc..bfebef7638ca 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -100,7 +100,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, * persistent for a given layer configuration. */ if (stat->ino >> shift) { - pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", dentry, stat->ino, xinobits); } else { if (lower_layer) @@ -698,7 +698,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, return nlink; fail: - pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", upperdentry, err); return fallback; } @@ -969,7 +969,7 @@ out: return inode; out_err: - pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); + pr_warn_ratelimited("failed to get inode (%i)\n", err); inode = ERR_PTR(err); goto out; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 76ff66339173..205163f2d3b6 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -141,10 +141,10 @@ out: return NULL; fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + pr_warn_ratelimited("failed to get origin (%i)\n", res); goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh); goto out; } @@ -360,7 +360,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, return 0; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); @@ -449,7 +449,7 @@ out: fail: inode = d_inode(real); - pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n", + pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n", is_upper ? "upper" : "origin", real, inode ? inode->i_ino : 0, err); goto out; @@ -475,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) return upper ?: ERR_PTR(-ESTALE); if (!d_is_dir(upper)) { - pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n", + pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n", index, upper); dput(upper); return ERR_PTR(-EIO); @@ -589,12 +589,12 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; orphan: - pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n", index, d_inode(index)->i_mode & S_IFMT, d_inode(index)->i_nlink); err = -ENOENT; @@ -696,7 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = NULL; goto out; } - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" + pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); @@ -723,13 +723,13 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, * unlinked, which means that finding a lower origin on lookup * whose index is a whiteout should be treated as an error. */ - pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; } else if (is_dir && verify) { if (!upper) { - pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", origin, index); goto fail; } @@ -738,7 +738,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, err = ovl_verify_upper(index, upper, false); if (err) { if (err == -ESTALE) { - pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", upper, origin, index); } goto fail; @@ -967,7 +967,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, */ err = -EPERM; if (d.redirect && !ofs->config.redirect_follow) { - pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n", + pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", dentry); goto out_put; } @@ -994,7 +994,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = -EPERM; if (!ofs->config.metacopy) { - pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n", + pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry); goto out_put; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f283b1d69a9e..fea79f7680c8 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -9,6 +9,9 @@ #include #include "ovl_entry.h" +#undef pr_fmt +#define pr_fmt(fmt) "overlayfs: " fmt + enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7255e6a5838f..a9816f25a1bb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -441,7 +441,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, const char *name, int namelen) { if (ino >> (64 - xinobits)) { - pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", namelen, name, ino, xinobits); return ino; } @@ -524,7 +524,7 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n", + pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", p->name, err); goto out; } @@ -971,7 +971,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) dentry = lookup_one_len(p->name, upper, p->len); if (IS_ERR(dentry)) { - pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", + pr_err("lookup '%s/%.*s' failed (%i)\n", upper->d_name.name, p->len, p->name, (int) PTR_ERR(dentry)); continue; @@ -1153,6 +1153,6 @@ next: out: ovl_cache_free(&list); if (err) - pr_err("overlayfs: failed index dir cleanup (%i)\n", err); + pr_err("failed index dir cleanup (%i)\n", err); return err; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7621ff176d15..cce483a1d0ff 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -462,7 +462,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) if (ovl_redirect_always_follow) config->redirect_follow = true; } else if (strcmp(mode, "nofollow") != 0) { - pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", + pr_err("bad mount option \"redirect_dir=%s\"\n", mode); return -EINVAL; } @@ -560,14 +560,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) break; default: - pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); + pr_err("unrecognized mount option \"%s\" or missing value\n", + p); return -EINVAL; } } /* Workdir is useless in non-upper mount */ if (!config->upperdir && config->workdir) { - pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", config->workdir); kfree(config->workdir); config->workdir = NULL; @@ -587,7 +588,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) /* Resolve metacopy -> redirect_dir dependency */ if (config->metacopy && !config->redirect_dir) { if (metacopy_opt && redirect_opt) { - pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n", + pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", config->redirect_mode); return -EINVAL; } @@ -596,7 +597,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) * There was an explicit redirect_dir=... that resulted * in this conflict. */ - pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n", + pr_info("disabling metacopy due to redirect_dir=%s\n", config->redirect_mode); config->metacopy = false; } else { @@ -692,7 +693,7 @@ out_unlock: out_dput: dput(work); out_err: - pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", ofs->config.workdir, name, -err); work = NULL; goto out_unlock; @@ -716,21 +717,21 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) int err = -EINVAL; if (!*name) { - pr_err("overlayfs: empty lowerdir\n"); + pr_err("empty lowerdir\n"); goto out; } err = kern_path(name, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("failed to resolve '%s': %i\n", name, err); goto out; } err = -EINVAL; if (ovl_dentry_weird(path->dentry)) { - pr_err("overlayfs: filesystem on '%s' not supported\n", name); + pr_err("filesystem on '%s' not supported\n", name); goto out_put; } if (!d_is_dir(path->dentry)) { - pr_err("overlayfs: '%s' not a directory\n", name); + pr_err("'%s' not a directory\n", name); goto out_put; } return 0; @@ -752,7 +753,7 @@ static int ovl_mount_dir(const char *name, struct path *path) if (!err) if (ovl_dentry_remote(path->dentry)) { - pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", + pr_err("filesystem on '%s' not supported as upperdir\n", tmp); path_put_init(path); err = -EINVAL; @@ -769,7 +770,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, int err = vfs_statfs(path, &statfs); if (err) - pr_err("overlayfs: statfs failed on '%s'\n", name); + pr_err("statfs failed on '%s'\n", name); else ofs->namelen = max(ofs->namelen, statfs.f_namelen); @@ -804,7 +805,7 @@ static int ovl_lower_dir(const char *name, struct path *path, (ofs->config.index && ofs->config.upperdir)) && !fh_type) { ofs->config.index = false; ofs->config.nfs_export = false; - pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", + pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", name); } @@ -996,7 +997,7 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, err = PTR_ERR_OR_ZERO(trap); if (err) { if (err == -ELOOP) - pr_err("overlayfs: conflicting %s path\n", name); + pr_err("conflicting %s path\n", name); return err; } @@ -1013,11 +1014,11 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) { if (ofs->config.index) { - pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", + pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", name); return -EBUSY; } else { - pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", + pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", name); return 0; } @@ -1035,7 +1036,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, /* Upper fs should not be r/o */ if (sb_rdonly(upperpath->mnt->mnt_sb)) { - pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + pr_err("upper fs is r/o, try multi-lower layers mount\n"); err = -EINVAL; goto out; } @@ -1052,7 +1053,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, upper_mnt = clone_private_mount(upperpath); err = PTR_ERR(upper_mnt); if (IS_ERR(upper_mnt)) { - pr_err("overlayfs: failed to clone upperpath\n"); + pr_err("failed to clone upperpath\n"); goto out; } @@ -1108,7 +1109,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * kernel upgrade. So warn instead of erroring out. */ if (!err) - pr_warn("overlayfs: upper fs needs to support d_type.\n"); + pr_warn("upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); @@ -1116,7 +1117,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (ofs->tmpfile) dput(temp); else - pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + pr_warn("upper fs does not support tmpfile.\n"); /* * Check if upper/work fs supports trusted.overlay.* xattr @@ -1126,7 +1127,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, ofs->noxattr = true; ofs->config.index = false; ofs->config.metacopy = false; - pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); + pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); @@ -1136,7 +1137,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); if (ofs->config.index && !fh_type) { ofs->config.index = false; - pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); + pr_warn("upper fs does not support file handles, falling back to index=off.\n"); } /* Check if upper fs has 32bit inode numbers */ @@ -1145,7 +1146,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, /* NFS export of r/w mount depends on index */ if (ofs->config.nfs_export && !ofs->config.index) { - pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); + pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } out: @@ -1165,11 +1166,11 @@ static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, err = -EINVAL; if (upperpath->mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); + pr_err("workdir and upperdir must reside under the same mount\n"); goto out; } if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { - pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); + pr_err("workdir and upperdir must be separate subtrees\n"); goto out; } @@ -1210,7 +1211,7 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, true); if (err) { - pr_err("overlayfs: failed to verify upper root origin\n"); + pr_err("failed to verify upper root origin\n"); goto out; } @@ -1233,18 +1234,18 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, upperpath->dentry, true, false); if (err) - pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); + pr_err("failed to verify index dir 'origin' xattr\n"); } err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); if (err) - pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); + pr_err("failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) err = ovl_indexdir_cleanup(ofs); } if (err || !ofs->indexdir) - pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: mnt_drop_write(mnt); @@ -1297,7 +1298,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) if (ofs->config.index || ofs->config.nfs_export) { ofs->config.index = false; ofs->config.nfs_export = false; - pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", + pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", uuid_is_null(&sb->s_uuid) ? "null" : "conflicting", path->dentry); @@ -1306,7 +1307,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) err = get_anon_bdev(&dev); if (err) { - pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); + pr_err("failed to get anonymous bdev for lowerpath\n"); return err; } @@ -1357,7 +1358,7 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, mnt = clone_private_mount(&stack[i]); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { - pr_err("overlayfs: failed to clone lowerpath\n"); + pr_err("failed to clone lowerpath\n"); iput(trap); goto out; } @@ -1401,7 +1402,7 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, } if (ofs->xino_bits) { - pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n", + pr_info("\"xino\" feature enabled using %d upper inode bits.\n", ofs->xino_bits); } @@ -1428,15 +1429,15 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); if (stacklen > OVL_MAX_STACK) { - pr_err("overlayfs: too many lower directories, limit is %d\n", + pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_err; } else if (!ofs->config.upperdir && stacklen == 1) { - pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); + pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); goto out_err; } else if (!ofs->config.upperdir && ofs->config.nfs_export && ofs->config.redirect_follow) { - pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } @@ -1459,7 +1460,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, err = -EINVAL; sb->s_stack_depth++; if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { - pr_err("overlayfs: maximum fs stacking depth exceeded\n"); + pr_err("maximum fs stacking depth exceeded\n"); goto out_err; } @@ -1515,7 +1516,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, while (!err && parent != next) { if (ovl_lookup_trap_inode(sb, parent)) { err = -ELOOP; - pr_err("overlayfs: overlapping %s path\n", name); + pr_err("overlapping %s path\n", name); } else if (ovl_is_inuse(parent)) { err = ovl_report_in_use(ofs, name); } @@ -1595,7 +1596,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; if (!ofs->config.lowerdir) { if (!silent) - pr_err("overlayfs: missing 'lowerdir'\n"); + pr_err("missing 'lowerdir'\n"); goto out_err; } @@ -1610,7 +1611,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (ofs->config.upperdir) { if (!ofs->config.workdir) { - pr_err("overlayfs: missing 'workdir'\n"); + pr_err("missing 'workdir'\n"); goto out_err; } @@ -1660,13 +1661,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ofs->indexdir) { ofs->config.index = false; if (ofs->upper_mnt && ofs->config.nfs_export) { - pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); + pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } } if (ofs->config.metacopy && ofs->config.nfs_export) { - pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); + pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index f5678a3f8350..cb4a5eaf0ba5 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -576,7 +576,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, err = ovl_do_setxattr(upperdentry, name, value, size, 0); if (err == -EOPNOTSUPP) { - pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + pr_warn("cannot set %s xattr on upper\n", name); ofs->noxattr = true; return xerr; } @@ -700,7 +700,7 @@ static void ovl_cleanup_index(struct dentry *dentry) inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { - pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", + pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", upperdentry, inode->i_ino, inode->i_nlink); /* * We either have a bug with persistent union nlink or a lower @@ -739,7 +739,7 @@ out: return; fail: - pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err); + pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err); goto out; } @@ -830,7 +830,7 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) err_unlock: unlock_rename(workdir, upperdir); err: - pr_err("overlayfs: failed to lock workdir+upperdir\n"); + pr_err("failed to lock workdir+upperdir\n"); return -EIO; } @@ -852,7 +852,7 @@ int ovl_check_metacopy_xattr(struct dentry *dentry) return 1; out: - pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res); + pr_warn_ratelimited("failed to get metacopy (%i)\n", res); return res; } @@ -899,7 +899,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value, return res; fail: - pr_warn_ratelimited("overlayfs: failed to get xattr %s: err=%zi)\n", + pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n", name, res); kfree(buf); return res; @@ -931,7 +931,7 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) return buf; invalid: - pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); + pr_warn_ratelimited("invalid redirect (%s)\n", buf); res = -EINVAL; kfree(buf); return ERR_PTR(res); -- cgit v1.2.3-59-g8ed1b From b1f9d3858f724ed45b279b689fb5b400d91352e3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 21 Dec 2019 11:42:29 +0200 Subject: ovl: use ovl_inode_lock in ovl_llseek() In ovl_llseek() we use the overlay inode rwsem to protect against concurrent modifications to real file f_pos, because we copy the overlay file f_pos to/from the real file f_pos. This caused a lockdep warning of locking order violation when the ovl_llseek() operation was called on a lower nested overlay layer while the upper layer fs sb_writers is held (with patch improving copy-up efficiency for big sparse file). Use the internal ovl_inode_lock() instead of the overlay inode rwsem in those cases. It is meant to be used for protecting against concurrent changes to overlay inode internal state changes. The locking order rules are documented to explain this case. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 4 ++-- fs/overlayfs/inode.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index e235a635d9ec..859efeaaefab 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -171,7 +171,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) * limitations that are more strict than ->s_maxbytes for specific * files, so we use the real file to perform seeks. */ - inode_lock(inode); + ovl_inode_lock(inode); real.file->f_pos = file->f_pos; old_cred = ovl_override_creds(inode->i_sb); @@ -179,7 +179,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) revert_creds(old_cred); file->f_pos = real.file->f_pos; - inode_unlock(inode); + ovl_inode_unlock(inode); fdput(real); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index bfebef7638ca..13981b0a8235 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -527,6 +527,27 @@ static const struct address_space_operations ovl_aops = { * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) * [...] &type->i_mutex_dir_key (stack_depth=0) + * + * Locking order w.r.t ovl_want_write() is important for nested overlayfs. + * + * This chain is valid: + * - inode->i_rwsem (inode_lock[2]) + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) + * - OVL_I(inode)->lock (ovl_inode_lock[2]) + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) + * + * And this chain is valid: + * - inode->i_rwsem (inode_lock[2]) + * - OVL_I(inode)->lock (ovl_inode_lock[2]) + * - lowerinode->i_rwsem (inode_lock[1]) + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) + * + * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is + * held, because it is in reverse order of the non-nested case using the same + * upper fs: + * - inode->i_rwsem (inode_lock[1]) + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) + * - OVL_I(inode)->lock (ovl_inode_lock[1]) */ #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH -- cgit v1.2.3-59-g8ed1b From b504c6540d1752c73e16548062c49bc9f447cb12 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 1 Nov 2019 20:35:51 +0800 Subject: ovl: improving copy-up efficiency for big sparse file Current copy-up is not efficient for big sparse file, It's not only slow but also wasting more disk space when the target lower file has huge hole inside. This patch tries to recognize file hole and skip it during copy-up. Detail logic of hole detection as below: When we detect next data position is larger than current position we will skip that hole, otherwise we copy data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually, it may not recognize all kind of holes and sometimes only skips partial of hole area. However, it will be enough for most of the use cases. Additionally, this optimization relies on lseek(2) SEEK_DATA implementation, so for some specific filesystems which do not support this feature will behave as before on copy-up. Reviewed-by: Amir Goldstein Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b168c65666e2..9fc47c2e078d 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -123,6 +123,9 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) loff_t old_pos = 0; loff_t new_pos = 0; loff_t cloned; + loff_t data_pos = -1; + loff_t hole_len; + bool skip_hole = false; int error = 0; if (len == 0) @@ -144,7 +147,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) goto out; /* Couldn't clone, so now we try to copy the data */ - /* FIXME: copy up sparse files efficiently */ + /* Check if lower fs supports seek operation */ + if (old_file->f_mode & FMODE_LSEEK && + old_file->f_op->llseek) + skip_hole = true; + while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; long bytes; @@ -157,6 +164,36 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) break; } + /* + * Fill zero for hole will cost unnecessary disk space + * and meanwhile slow down the copy-up speed, so we do + * an optimization for hole during copy-up, it relies + * on SEEK_DATA implementation in lower fs so if lower + * fs does not support it, copy-up will behave as before. + * + * Detail logic of hole detection as below: + * When we detect next data position is larger than current + * position we will skip that hole, otherwise we copy + * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually, + * it may not recognize all kind of holes and sometimes + * only skips partial of hole area. However, it will be + * enough for most of the use cases. + */ + + if (skip_hole && data_pos < old_pos) { + data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); + if (data_pos > old_pos) { + hole_len = data_pos - old_pos; + len -= hole_len; + old_pos = new_pos = data_pos; + continue; + } else if (data_pos == -ENXIO) { + break; + } else if (data_pos < 0) { + skip_hole = false; + } + } + bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); @@ -480,7 +517,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) } inode_lock(temp->d_inode); - if (c->metacopy) + if (S_ISREG(c->stat.mode)) err = ovl_set_size(temp, &c->stat); if (!err) err = ovl_set_attr(temp, &c->stat); -- cgit v1.2.3-59-g8ed1b From 94375f9d5103c2eb2f905381993a2fb70c297364 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2019 14:12:40 +0200 Subject: ovl: generalize the lower_layers[] array Rename lower_layers[] array to layers[], extend its size by one and initialize layers[0] with upper layer values. Lower layers are now addressed with index 1..numlower. layers[0] is reserved even with lower only overlay. [SzM: replace ofs->numlower with ofs->numlayer, the latter's value is incremented by one] Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 6 ++---- fs/overlayfs/namei.c | 12 ++++++------ fs/overlayfs/ovl_entry.h | 4 ++-- fs/overlayfs/super.c | 44 +++++++++++++++++++++++++------------------- 4 files changed, 35 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 2fe724294aeb..d01f938fcc5c 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -424,7 +424,6 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, struct ovl_layer *layer) { struct ovl_fs *ofs = sb->s_fs_info; - struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; struct dentry *index = NULL; struct dentry *this = NULL; struct inode *inode; @@ -466,7 +465,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, * recursive call walks back from indexed upper to the topmost * connected/hashed upper parent (or up to root). */ - this = ovl_lookup_real(sb, upper, &upper_layer); + this = ovl_lookup_real(sb, upper, &ofs->layers[0]); dput(upper); } @@ -646,8 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, struct dentry *index) { struct ovl_fs *ofs = sb->s_fs_info; - struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; - struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer; + struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer; struct dentry *real = upper ?: (index ?: lowerpath->dentry); /* diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 205163f2d3b6..ed9e129fae04 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -322,16 +322,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *origin = NULL; int i; - for (i = 0; i < ofs->numlower; i++) { + for (i = 1; i < ofs->numlayer; i++) { /* * If lower fs uuid is not unique among lower fs we cannot match * fh->uuid to layer. */ - if (ofs->lower_layers[i].fsid && - ofs->lower_layers[i].fs->bad_uuid) + if (ofs->layers[i].fsid && + ofs->layers[i].fs->bad_uuid) continue; - origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt, + origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt, connected); if (origin) break; @@ -354,7 +354,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, } **stackp = (struct ovl_path){ .dentry = origin, - .layer = &ofs->lower_layers[i] + .layer = &ofs->layers[i] }; return 0; @@ -885,7 +885,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!d.stop && poe->numlower) { err = -ENOMEM; - stack = kcalloc(ofs->numlower, sizeof(struct ovl_path), + stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path), GFP_KERNEL); if (!stack) goto out_put_upper; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 28348c44ea5b..64baf04cdede 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -45,10 +45,10 @@ struct ovl_path { /* private information held for overlayfs's superblock */ struct ovl_fs { struct vfsmount *upper_mnt; - unsigned int numlower; + unsigned int numlayer; /* Number of unique lower sb that differ from upper sb */ unsigned int numlowerfs; - struct ovl_layer *lower_layers; + struct ovl_layer *layers; struct ovl_sb *lower_fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index cce483a1d0ff..374a07633dca 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -224,13 +224,13 @@ static void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ofs->upper_mnt->mnt_root); mntput(ofs->upper_mnt); - for (i = 0; i < ofs->numlower; i++) { - iput(ofs->lower_layers[i].trap); - mntput(ofs->lower_layers[i].mnt); + for (i = 1; i < ofs->numlayer; i++) { + iput(ofs->layers[i].trap); + mntput(ofs->layers[i].mnt); } for (i = 0; i < ofs->numlowerfs; i++) free_anon_bdev(ofs->lower_fs[i].pseudo_dev); - kfree(ofs->lower_layers); + kfree(ofs->layers); kfree(ofs->lower_fs); kfree(ofs->config.lowerdir); @@ -1319,16 +1319,16 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) return ofs->numlowerfs; } -static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, - struct path *stack, unsigned int numlower) +static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, + struct path *stack, unsigned int numlower) { int err; unsigned int i; err = -ENOMEM; - ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), - GFP_KERNEL); - if (ofs->lower_layers == NULL) + ofs->layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), + GFP_KERNEL); + if (ofs->layers == NULL) goto out; ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb), @@ -1336,6 +1336,12 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, if (ofs->lower_fs == NULL) goto out; + /* idx 0 is reserved for upper fs even with lower only overlay */ + ofs->layers[0].mnt = ofs->upper_mnt; + ofs->layers[0].idx = 0; + ofs->layers[0].fsid = 0; + ofs->numlayer = 1; + for (i = 0; i < numlower; i++) { struct vfsmount *mnt; struct inode *trap; @@ -1369,15 +1375,15 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, */ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; - ofs->lower_layers[ofs->numlower].trap = trap; - ofs->lower_layers[ofs->numlower].mnt = mnt; - ofs->lower_layers[ofs->numlower].idx = i + 1; - ofs->lower_layers[ofs->numlower].fsid = fsid; + ofs->layers[ofs->numlayer].trap = trap; + ofs->layers[ofs->numlayer].mnt = mnt; + ofs->layers[ofs->numlayer].idx = ofs->numlayer; + ofs->layers[ofs->numlayer].fsid = fsid; if (fsid) { - ofs->lower_layers[ofs->numlower].fs = + ofs->layers[ofs->numlayer].fs = &ofs->lower_fs[fsid - 1]; } - ofs->numlower++; + ofs->numlayer++; } /* @@ -1464,7 +1470,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, goto out_err; } - err = ovl_get_lower_layers(sb, ofs, stack, numlower); + err = ovl_get_layers(sb, ofs, stack, numlower); if (err) goto out_err; @@ -1475,7 +1481,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = dget(stack[i].dentry); - oe->lowerstack[i].layer = &ofs->lower_layers[i]; + oe->lowerstack[i].layer = &ofs->layers[i+1]; } if (remote) @@ -1556,9 +1562,9 @@ static int ovl_check_overlapping_layers(struct super_block *sb, return err; } - for (i = 0; i < ofs->numlower; i++) { + for (i = 1; i < ofs->numlayer; i++) { err = ovl_check_layer(sb, ofs, - ofs->lower_layers[i].mnt->mnt_root, + ofs->layers[i].mnt->mnt_root, "lowerdir"); if (err) return err; -- cgit v1.2.3-59-g8ed1b From 0f831ec85eda1ae27490baba106aba632c1d8e94 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 16 Nov 2019 18:14:41 +0200 Subject: ovl: simplify ovl_same_sb() helper No code uses the sb returned from this helper, so make it retrun a boolean and rename it to ovl_same_fs(). The xino mode is irrelevant when all layers are on same fs, so instead of describing samefs with mode OVL_XINO_OFF, use a new xino_mode state, which is 0 in the case of samefs, -1 in the case of xino=off and > 0 with xino enabled. Create a new helper ovl_same_dev(), to use instead of the common check for (ovl_same_fs() || xinobits). Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 8 ++++---- fs/overlayfs/overlayfs.h | 17 +++++++++++++---- fs/overlayfs/ovl_entry.h | 9 +++++++-- fs/overlayfs/readdir.c | 4 ++-- fs/overlayfs/super.c | 21 +++++++++++---------- fs/overlayfs/util.c | 12 ------------ 6 files changed, 37 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 13981b0a8235..cf51fa1e45a8 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -78,7 +78,7 @@ out: static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, struct ovl_layer *lower_layer) { - bool samefs = ovl_same_sb(dentry->d_sb); + bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); if (samefs) { @@ -146,7 +146,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, struct path realpath; const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); - bool samefs = ovl_same_sb(dentry->d_sb); + bool samefs = ovl_same_fs(dentry->d_sb); struct ovl_layer *lower_layer = NULL; int err; bool metacopy_blocks = false; @@ -168,7 +168,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, * If lower filesystem supports NFS file handles, this also guaranties * persistent st_ino across mount cycle. */ - if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { + if (!is_dir || ovl_same_dev(dentry->d_sb)) { if (!OVL_TYPE_UPPER(type)) { lower_layer = ovl_layer_lower(dentry); } else if (OVL_TYPE_ORIGIN(type)) { @@ -586,7 +586,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). */ - if (ovl_same_sb(inode->i_sb) || xinobits) { + if (ovl_same_dev(inode->i_sb)) { inode->i_ino = ino; if (xinobits && fsid && !(ino >> (64 - xinobits))) inode->i_ino |= (unsigned long)fsid << (64 - xinobits); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index fea79f7680c8..e4ae3a459145 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -224,7 +224,6 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); -struct super_block *ovl_same_sb(struct super_block *sb); int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); @@ -302,11 +301,21 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); } -static inline unsigned int ovl_xino_bits(struct super_block *sb) +/* All layers on same fs? */ +static inline bool ovl_same_fs(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + return OVL_FS(sb)->xino_mode == 0; +} - return ofs->xino_bits; +/* All overlay inodes have same st_dev? */ +static inline bool ovl_same_dev(struct super_block *sb) +{ + return OVL_FS(sb)->xino_mode >= 0; +} + +static inline unsigned int ovl_xino_bits(struct super_block *sb) +{ + return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0; } static inline int ovl_inode_lock(struct inode *inode) diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 64baf04cdede..9a785702a2a4 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -71,10 +71,15 @@ struct ovl_fs { struct inode *workbasedir_trap; struct inode *workdir_trap; struct inode *indexdir_trap; - /* Inode numbers in all layers do not use the high xino_bits */ - unsigned int xino_bits; + /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */ + int xino_mode; }; +static inline struct ovl_fs *OVL_FS(struct super_block *sb) +{ + return (struct ovl_fs *)sb->s_fs_info; +} + /* private information held for every overlayfs dentry */ struct ovl_entry { union { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index a9816f25a1bb..c8e478fa96a5 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -469,7 +469,7 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) int xinobits = ovl_xino_bits(dir->d_sb); int err = 0; - if (!ovl_same_sb(dir->d_sb) && !xinobits) + if (!ovl_same_dev(dir->d_sb)) goto out; if (p->name[0] == '.') { @@ -744,7 +744,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) * entries. */ if (ovl_xino_bits(dentry->d_sb) || - (ovl_same_sb(dentry->d_sb) && + (ovl_same_fs(dentry->d_sb) && (ovl_is_impure_dir(file) || OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { return ovl_iterate_real(file, ctx); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 374a07633dca..8f08313eb897 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -358,7 +358,7 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ofs->config.nfs_export != ovl_nfs_export_def) seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? "on" : "off"); - if (ofs->config.xino != ovl_xino_def()) + if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", @@ -811,7 +811,7 @@ static int ovl_lower_dir(const char *name, struct path *path, /* Check if lower fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) - ofs->xino_bits = 0; + ofs->xino_mode = -1; return 0; @@ -1142,7 +1142,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, /* Check if upper fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) - ofs->xino_bits = 0; + ofs->xino_mode = -1; /* NFS export of r/w mount depends on index */ if (ofs->config.nfs_export && !ofs->config.index) { @@ -1395,21 +1395,22 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, * inode number. */ if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) { - ofs->xino_bits = 0; - ofs->config.xino = OVL_XINO_OFF; - } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) { + if (ofs->config.xino == OVL_XINO_ON) + pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); + ofs->xino_mode = 0; + } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for numlowerfs+1 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for * upper fs even with non upper overlay. */ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); - ofs->xino_bits = ilog2(ofs->numlowerfs) + 1; + ofs->xino_mode = ilog2(ofs->numlowerfs) + 1; } - if (ofs->xino_bits) { + if (ofs->xino_mode > 0) { pr_info("\"xino\" feature enabled using %d upper inode bits.\n", - ofs->xino_bits); + ofs->xino_mode); } err = 0; @@ -1610,7 +1611,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ if (ofs->config.xino != OVL_XINO_OFF) - ofs->xino_bits = BITS_PER_LONG - 32; + ofs->xino_mode = BITS_PER_LONG - 32; /* alloc/destroy_inode needed for setting up traps in inode cache */ sb->s_op = &ovl_super_operations; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index cb4a5eaf0ba5..df503a8c6bcf 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -40,18 +40,6 @@ const struct cred *ovl_override_creds(struct super_block *sb) return override_creds(ofs->creator_cred); } -struct super_block *ovl_same_sb(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - if (!ofs->numlowerfs) - return ofs->upper_mnt->mnt_sb; - else if (ofs->numlowerfs == 1 && !ofs->upper_mnt) - return ofs->lower_fs[0].sb; - else - return NULL; -} - /* * Check if underlying fs supports file handles and try to determine encoding * type, in order to deduce maximum inode number used by fs. -- cgit v1.2.3-59-g8ed1b From 07f1e59637a8e5a8bddba5da7567d46635da510f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 14 Jan 2020 21:59:22 +0200 Subject: ovl: generalize the lower_fs[] array Rename lower_fs[] array to fs[], extend its size by one and use index fsid (instead of fsid-1) to access the fs[] array. Initialize fs[0] with upper fs values. fsid 0 is reserved even with lower only overlay, so fs[0] remains null in this case. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 21 +++++++--------- fs/overlayfs/ovl_entry.h | 6 ++--- fs/overlayfs/super.c | 65 +++++++++++++++++++++++++----------------------- 3 files changed, 46 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index cf51fa1e45a8..60f7b8f092f2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -75,8 +75,7 @@ out: return err; } -static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, - struct ovl_layer *lower_layer) +static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); @@ -103,9 +102,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", dentry, stat->ino, xinobits); } else { - if (lower_layer) - stat->ino |= ((u64)lower_layer->fsid) << shift; - + stat->ino |= ((u64)fsid) << shift; stat->dev = dentry->d_sb->s_dev; return 0; } @@ -124,7 +121,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, */ stat->dev = dentry->d_sb->s_dev; stat->ino = dentry->d_inode->i_ino; - } else if (lower_layer && lower_layer->fsid) { + } else if (fsid) { /* * For non-samefs setup, if we cannot map all layers st_ino * to a unified address space, we need to make sure that st_dev @@ -132,7 +129,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, * lower layers use the unique anonymous bdev assigned to the * lower fs. */ - stat->dev = lower_layer->fs->pseudo_dev; + stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; } return 0; @@ -147,7 +144,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); bool samefs = ovl_same_fs(dentry->d_sb); - struct ovl_layer *lower_layer = NULL; + int fsid = 0; int err; bool metacopy_blocks = false; @@ -170,7 +167,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, */ if (!is_dir || ovl_same_dev(dentry->d_sb)) { if (!OVL_TYPE_UPPER(type)) { - lower_layer = ovl_layer_lower(dentry); + fsid = ovl_layer_lower(dentry)->fsid; } else if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | STATX_BLOCKS | @@ -200,13 +197,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || (!ovl_verify_lower(dentry->d_sb) && (is_dir || lowerstat.nlink == 1))) { - lower_layer = ovl_layer_lower(dentry); + fsid = ovl_layer_lower(dentry)->fsid; /* * Cannot use origin st_dev;st_ino because * origin inode content may differ from overlay * inode content. */ - if (samefs || lower_layer->fsid) + if (samefs || fsid) stat->ino = lowerstat.ino; } @@ -241,7 +238,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, } } - err = ovl_map_dev_ino(dentry, stat, lower_layer); + err = ovl_map_dev_ino(dentry, stat, fsid); if (err) goto out; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 9a785702a2a4..d9fbebdb6076 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -46,10 +46,10 @@ struct ovl_path { struct ovl_fs { struct vfsmount *upper_mnt; unsigned int numlayer; - /* Number of unique lower sb that differ from upper sb */ - unsigned int numlowerfs; + /* Number of unique fs among layers including upper fs */ + unsigned int numfs; struct ovl_layer *layers; - struct ovl_sb *lower_fs; + struct ovl_sb *fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; /* workdir is the 'work' directory under workbasedir */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 8f08313eb897..d7c406d8c793 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -228,10 +228,11 @@ static void ovl_free_fs(struct ovl_fs *ofs) iput(ofs->layers[i].trap); mntput(ofs->layers[i].mnt); } - for (i = 0; i < ofs->numlowerfs; i++) - free_anon_bdev(ofs->lower_fs[i].pseudo_dev); kfree(ofs->layers); - kfree(ofs->lower_fs); + /* fs[0].pseudo_dev is either null or real upper st_dev */ + for (i = 1; i < ofs->numfs; i++) + free_anon_bdev(ofs->fs[i].pseudo_dev); + kfree(ofs->fs); kfree(ofs->config.lowerdir); kfree(ofs->config.upperdir); @@ -1259,7 +1260,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) if (!ofs->config.nfs_export && !ofs->upper_mnt) return true; - for (i = 0; i < ofs->numlowerfs; i++) { + for (i = 1; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a * lower layer, so we can accept lower fs with null uuid as long @@ -1267,8 +1268,8 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * if we detect multiple lower fs with the same uuid, we * disable lower file handle decoding on all of them. */ - if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) { - ofs->lower_fs[i].bad_uuid = true; + if (uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { + ofs->fs[i].bad_uuid = true; return false; } } @@ -1284,13 +1285,9 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) int err; bool bad_uuid = false; - /* fsid 0 is reserved for upper fs even with non upper overlay */ - if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb) - return 0; - - for (i = 0; i < ofs->numlowerfs; i++) { - if (ofs->lower_fs[i].sb == sb) - return i + 1; + for (i = 0; i < ofs->numfs; i++) { + if (ofs->fs[i].sb == sb) + return i; } if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { @@ -1311,12 +1308,11 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) return err; } - ofs->lower_fs[ofs->numlowerfs].sb = sb; - ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev; - ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid; - ofs->numlowerfs++; + ofs->fs[ofs->numfs].sb = sb; + ofs->fs[ofs->numfs].pseudo_dev = dev; + ofs->fs[ofs->numfs].bad_uuid = bad_uuid; - return ofs->numlowerfs; + return ofs->numfs++; } static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, @@ -1331,17 +1327,27 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (ofs->layers == NULL) goto out; - ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb), - GFP_KERNEL); - if (ofs->lower_fs == NULL) + ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); + if (ofs->fs == NULL) goto out; - /* idx 0 is reserved for upper fs even with lower only overlay */ + /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ + ofs->numfs++; + ofs->layers[0].mnt = ofs->upper_mnt; ofs->layers[0].idx = 0; ofs->layers[0].fsid = 0; ofs->numlayer = 1; + /* + * All lower layers that share the same fs as upper layer, use the real + * upper st_dev. + */ + if (ofs->upper_mnt) { + ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; + ofs->fs[0].pseudo_dev = ofs->upper_mnt->mnt_sb->s_dev; + } + for (i = 0; i < numlower; i++) { struct vfsmount *mnt; struct inode *trap; @@ -1379,10 +1385,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ofs->layers[ofs->numlayer].mnt = mnt; ofs->layers[ofs->numlayer].idx = ofs->numlayer; ofs->layers[ofs->numlayer].fsid = fsid; - if (fsid) { - ofs->layers[ofs->numlayer].fs = - &ofs->lower_fs[fsid - 1]; - } + ofs->layers[ofs->numlayer].fs = &ofs->fs[fsid]; ofs->numlayer++; } @@ -1394,18 +1397,18 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, * bits reserved for fsid, it emits a warning and uses the original * inode number. */ - if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) { + if (ofs->numfs - !ofs->upper_mnt == 1) { if (ofs->config.xino == OVL_XINO_ON) pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); ofs->xino_mode = 0; } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { /* - * This is a roundup of number of bits needed for numlowerfs+1 - * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for - * upper fs even with non upper overlay. + * This is a roundup of number of bits needed for encoding + * fsid, where fsid 0 is reserved for upper fs even with + * lower only overlay. */ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); - ofs->xino_mode = ilog2(ofs->numlowerfs) + 1; + ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; } if (ofs->xino_mode > 0) { -- cgit v1.2.3-59-g8ed1b From 1b81dddd354cf304574d79004400a6385613ae4e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 16 Nov 2019 18:52:20 +0200 Subject: ovl: fix corner case of conflicting lower layer uuid This fixes ovl_lower_uuid_ok() to correctly detect the corner case: - two filesystems, A and B, both have null uuid - upper layer is on A - lower layer 1 is also on A - lower layer 2 is on B In this case, bad_uuid would not have been set for B, because the check only involved the list of lower fs. Hence we'll try to decode a layer 2 origin on layer 1 and fail. We check for conflicting (and null) uuid among all lower layers, including those layers that are on the same fs as the upper layer. Reported-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/ovl_entry.h | 2 ++ fs/overlayfs/super.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index d9fbebdb6076..c9324ad47e15 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -24,6 +24,8 @@ struct ovl_sb { dev_t pseudo_dev; /* Unusable (conflicting) uuid */ bool bad_uuid; + /* Used as a lower layer (but maybe also as upper) */ + bool is_lower; }; struct ovl_layer { diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d7c406d8c793..7cf543608b9f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1260,7 +1260,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) if (!ofs->config.nfs_export && !ofs->upper_mnt) return true; - for (i = 1; i < ofs->numfs; i++) { + for (i = 0; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a * lower layer, so we can accept lower fs with null uuid as long @@ -1268,7 +1268,8 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * if we detect multiple lower fs with the same uuid, we * disable lower file handle decoding on all of them. */ - if (uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { + if (ofs->fs[i].is_lower && + uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { ofs->fs[i].bad_uuid = true; return false; } @@ -1342,10 +1343,12 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, /* * All lower layers that share the same fs as upper layer, use the real * upper st_dev. + * is_lower will be set if upper fs is shared with a lower layer. */ if (ofs->upper_mnt) { ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; ofs->fs[0].pseudo_dev = ofs->upper_mnt->mnt_sb->s_dev; + ofs->fs[0].is_lower = false; } for (i = 0; i < numlower; i++) { @@ -1387,6 +1390,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ofs->layers[ofs->numlayer].fsid = fsid; ofs->layers[ofs->numlayer].fs = &ofs->fs[fsid]; ofs->numlayer++; + ofs->fs[fsid].is_lower = true; } /* -- cgit v1.2.3-59-g8ed1b From b7bf9908e17c4dc4b80876f299ac03ddf188efd9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 14 Jan 2020 22:17:25 +0200 Subject: ovl: fix corner case of non-constant st_dev;st_ino On non-samefs overlay without xino, non pure upper inodes should use a pseudo_dev assigned to each unique lower fs, but if lower layer is on the same fs and upper layer, it has no pseudo_dev assigned. In this overlay layers setup: - two filesystems, A and B - upper layer is on A - lower layer 1 is also on A - lower layer 2 is on B Non pure upper overlay inode, whose origin is in layer 1 will have the st_dev;st_ino values of the real lower inode before copy up and the st_dev;st_ino values of the real upper inode after copy up. Fix this inconsitency by assigning a unique pseudo_dev also for upper fs, that will be used as st_dev value along with the lower inode st_dev for overlay inodes in the case above. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 16 ++++------------ fs/overlayfs/super.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 60f7b8f092f2..79e8994e3bc1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -121,13 +121,12 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) */ stat->dev = dentry->d_sb->s_dev; stat->ino = dentry->d_inode->i_ino; - } else if (fsid) { + } else { /* * For non-samefs setup, if we cannot map all layers st_ino * to a unified address space, we need to make sure that st_dev - * is unique per lower fs. Upper layer uses real st_dev and - * lower layers use the unique anonymous bdev assigned to the - * lower fs. + * is unique per underlying fs, so we use the unique anonymous + * bdev assigned to the underlying fs. */ stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; } @@ -143,7 +142,6 @@ int ovl_getattr(const struct path *path, struct kstat *stat, struct path realpath; const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); - bool samefs = ovl_same_fs(dentry->d_sb); int fsid = 0; int err; bool metacopy_blocks = false; @@ -198,13 +196,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, (!ovl_verify_lower(dentry->d_sb) && (is_dir || lowerstat.nlink == 1))) { fsid = ovl_layer_lower(dentry)->fsid; - /* - * Cannot use origin st_dev;st_ino because - * origin inode content may differ from overlay - * inode content. - */ - if (samefs || fsid) - stat->ino = lowerstat.ino; + stat->ino = lowerstat.ino; } /* diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7cf543608b9f..dcdcb4e3003a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -229,8 +229,7 @@ static void ovl_free_fs(struct ovl_fs *ofs) mntput(ofs->layers[i].mnt); } kfree(ofs->layers); - /* fs[0].pseudo_dev is either null or real upper st_dev */ - for (i = 1; i < ofs->numfs; i++) + for (i = 0; i < ofs->numfs; i++) free_anon_bdev(ofs->fs[i].pseudo_dev); kfree(ofs->fs); @@ -1341,13 +1340,19 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ofs->numlayer = 1; /* - * All lower layers that share the same fs as upper layer, use the real - * upper st_dev. + * All lower layers that share the same fs as upper layer, use the same + * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower + * only overlay to simplify ovl_fs_free(). * is_lower will be set if upper fs is shared with a lower layer. */ + err = get_anon_bdev(&ofs->fs[0].pseudo_dev); + if (err) { + pr_err("failed to get anonymous bdev for upper fs\n"); + goto out; + } + if (ofs->upper_mnt) { ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; - ofs->fs[0].pseudo_dev = ofs->upper_mnt->mnt_sb->s_dev; ofs->fs[0].is_lower = false; } -- cgit v1.2.3-59-g8ed1b From 1346416564f06444ca59778c05607bedb91a991a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Jan 2020 09:46:45 +0100 Subject: ovl: layer is const The ovl_layer struct is never modified except at initialization. Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 12 ++++++------ fs/overlayfs/overlayfs.h | 2 +- fs/overlayfs/ovl_entry.h | 4 ++-- fs/overlayfs/readdir.c | 2 +- fs/overlayfs/super.c | 23 ++++++++++++----------- fs/overlayfs/util.c | 2 +- 6 files changed, 23 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index d01f938fcc5c..6f54d70cef27 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -358,7 +358,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) */ static struct dentry *ovl_lookup_real_one(struct dentry *connected, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct inode *dir = d_inode(connected); struct dentry *this, *parent = NULL; @@ -414,14 +414,14 @@ fail: static struct dentry *ovl_lookup_real(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer); + const struct ovl_layer *layer); /* * Lookup an indexed or hashed overlay dentry by real inode. */ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct ovl_fs *ofs = sb->s_fs_info; struct dentry *index = NULL; @@ -486,7 +486,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, */ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct dentry *next, *parent = NULL; struct dentry *ancestor = ERR_PTR(-EIO); @@ -539,7 +539,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, */ static struct dentry *ovl_lookup_real(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct dentry *connected; int err = 0; @@ -645,7 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, struct dentry *index) { struct ovl_fs *ofs = sb->s_fs_info; - struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer; + const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer; struct dentry *real = upper ?: (index ?: lowerpath->dentry); /* diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e4ae3a459145..dabfa0d559fb 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -239,7 +239,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); -struct ovl_layer *ovl_layer_lower(struct dentry *dentry); +const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index c9324ad47e15..89015ea822e7 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -40,7 +40,7 @@ struct ovl_layer { }; struct ovl_path { - struct ovl_layer *layer; + const struct ovl_layer *layer; struct dentry *dentry; }; @@ -50,7 +50,7 @@ struct ovl_fs { unsigned int numlayer; /* Number of unique fs among layers including upper fs */ unsigned int numfs; - struct ovl_layer *layers; + const struct ovl_layer *layers; struct ovl_sb *fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index c8e478fa96a5..40ac9ce2465a 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -691,7 +691,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) int err; struct ovl_dir_file *od = file->private_data; struct dentry *dir = file->f_path.dentry; - struct ovl_layer *lower_layer = ovl_layer_lower(dir); + const struct ovl_layer *lower_layer = ovl_layer_lower(dir); struct ovl_readdir_translate rdt = { .ctx.actor = ovl_fill_real, .orig_ctx = ctx, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index dcdcb4e3003a..c795b7498ad2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1320,12 +1320,13 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, { int err; unsigned int i; + struct ovl_layer *layers; err = -ENOMEM; - ofs->layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), - GFP_KERNEL); - if (ofs->layers == NULL) + layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); + if (!layers) goto out; + ofs->layers = layers; ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); if (ofs->fs == NULL) @@ -1334,9 +1335,9 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ ofs->numfs++; - ofs->layers[0].mnt = ofs->upper_mnt; - ofs->layers[0].idx = 0; - ofs->layers[0].fsid = 0; + layers[0].mnt = ofs->upper_mnt; + layers[0].idx = 0; + layers[0].fsid = 0; ofs->numlayer = 1; /* @@ -1389,11 +1390,11 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, */ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; - ofs->layers[ofs->numlayer].trap = trap; - ofs->layers[ofs->numlayer].mnt = mnt; - ofs->layers[ofs->numlayer].idx = ofs->numlayer; - ofs->layers[ofs->numlayer].fsid = fsid; - ofs->layers[ofs->numlayer].fs = &ofs->fs[fsid]; + layers[ofs->numlayer].trap = trap; + layers[ofs->numlayer].mnt = mnt; + layers[ofs->numlayer].idx = ofs->numlayer; + layers[ofs->numlayer].fsid = fsid; + layers[ofs->numlayer].fs = &ofs->fs[fsid]; ofs->numlayer++; ofs->fs[fsid].is_lower = true; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index df503a8c6bcf..ea005085803f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -186,7 +186,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry) return oe->numlower ? oe->lowerstack[0].dentry : NULL; } -struct ovl_layer *ovl_layer_lower(struct dentry *dentry) +const struct ovl_layer *ovl_layer_lower(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; -- cgit v1.2.3-59-g8ed1b From 5dcdc43e24a1810d3c3f4959af3d0c8e0026d863 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 20 Nov 2019 17:45:25 +0800 Subject: vfs: add vfs_iocb_iter_[read|write] helper functions This doesn't cause any behavior changes and will be used by overlay async IO implementation. Signed-off-by: Jiufei Xue Signed-off-by: Miklos Szeredi --- fs/read_write.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 16 ++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 5bbf587f5bc1..bc75493e747b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -939,6 +939,34 @@ out: return ret; } +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->read_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + goto out; + ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_read_iter(file, iocb, iter); +out: + if (ret >= 0) + fsnotify_access(file); + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_read); + ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { @@ -975,6 +1003,34 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, return ret; } +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->write_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + return 0; + ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_write_iter(file, iocb, iter); + if (ret > 0) + fsnotify_modify(file); + + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_write); + ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..28315a286bab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2078,6 +2078,18 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) }; } +static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + .ki_flags = kiocb_src->ki_flags, + .ki_hint = kiocb_src->ki_hint, + .ki_ioprio = kiocb_src->ki_ioprio, + .ki_pos = kiocb_src->ki_pos, + }; +} + /* * Inode state bits. Protected by inode->i_lock * @@ -3108,6 +3120,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, + struct iov_iter *iter); +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, + struct iov_iter *iter); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); -- cgit v1.2.3-59-g8ed1b From 2406a307ac7ddfd7effeeaff6947149ec6a95b4e Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 20 Nov 2019 17:45:26 +0800 Subject: ovl: implement async IO routines A performance regression was observed since linux v4.19 with aio test using fio with iodepth 128 on overlayfs. The queue depth of the device was always 1 which is unexpected. After investigation, it was found that commit 16914e6fc7e1 ("ovl: add ovl_read_iter()") and commit 2a92e07edc5e ("ovl: add ovl_write_iter()") resulted in vfs_iter_{read,write} being called on underlying filesystem, which always results in syncronous IO. Implement async IO for stacked reading and writing. This resolves the performance regresion. This is implemented by allocating a new kiocb for submitting the AIO request on the underlying filesystem. When the request is completed, the new kiocb is freed and the completion callback is called on the original iocb. Signed-off-by: Jiufei Xue Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 109 ++++++++++++++++++++++++++++++++++++++++++----- fs/overlayfs/overlayfs.h | 2 + fs/overlayfs/super.c | 14 ++++-- 3 files changed, 110 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 859efeaaefab..4d1796c37dfd 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -11,6 +11,14 @@ #include #include "overlayfs.h" +struct ovl_aio_req { + struct kiocb iocb; + struct kiocb *orig_iocb; + struct fd fd; +}; + +static struct kmem_cache *ovl_aio_request_cachep; + static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -225,6 +233,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) return flags; } +static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) +{ + struct kiocb *iocb = &aio_req->iocb; + struct kiocb *orig_iocb = aio_req->orig_iocb; + + if (iocb->ki_flags & IOCB_WRITE) { + struct inode *inode = file_inode(orig_iocb->ki_filp); + + file_end_write(iocb->ki_filp); + ovl_copyattr(ovl_inode_real(inode), inode); + } + + orig_iocb->ki_pos = iocb->ki_pos; + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); +} + +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + ovl_aio_cleanup_handler(aio_req); + orig_iocb->ki_complete(orig_iocb, res, res2); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -240,10 +275,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) return ret; old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + if (is_sync_kiocb(iocb)) { + ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + } else { + struct ovl_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +out: revert_creds(old_cred); - ovl_file_accessed(file); fdput(real); @@ -274,15 +327,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; old_cred = ovl_override_creds(file_inode(file)->i_sb); - file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); - file_end_write(real.file); + if (is_sync_kiocb(iocb)) { + file_start_write(real.file); + ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(ovl_inode_real(inode), inode); + } else { + struct ovl_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + file_start_write(real.file); + aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +out: revert_creds(old_cred); - - /* Update size */ - ovl_copyattr(ovl_inode_real(inode), inode); - fdput(real); out_unlock: @@ -651,3 +722,19 @@ const struct file_operations ovl_file_operations = { .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, }; + +int __init ovl_aio_request_cache_init(void) +{ + ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req", + sizeof(struct ovl_aio_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ovl_aio_request_cachep) + return -ENOMEM; + + return 0; +} + +void ovl_aio_request_cache_destroy(void) +{ + kmem_cache_destroy(ovl_aio_request_cachep); +} diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index dabfa0d559fb..3623d28aa4fa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -450,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; +int __init ovl_aio_request_cache_init(void); +void ovl_aio_request_cache_destroy(void); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c795b7498ad2..319fe0d355b0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1770,9 +1770,15 @@ static int __init ovl_init(void) if (ovl_inode_cachep == NULL) return -ENOMEM; - err = register_filesystem(&ovl_fs_type); - if (err) - kmem_cache_destroy(ovl_inode_cachep); + err = ovl_aio_request_cache_init(); + if (!err) { + err = register_filesystem(&ovl_fs_type); + if (!err) + return 0; + + ovl_aio_request_cache_destroy(); + } + kmem_cache_destroy(ovl_inode_cachep); return err; } @@ -1787,7 +1793,7 @@ static void __exit ovl_exit(void) */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); - + ovl_aio_request_cache_destroy(); } module_init(ovl_init); -- cgit v1.2.3-59-g8ed1b From 1a980b8cbf0059a5308eea61522f232fd03002e2 Mon Sep 17 00:00:00 2001 From: Murphy Zhou Date: Fri, 17 Jan 2020 20:49:29 +0800 Subject: ovl: add splice file read write helper Now overlayfs falls back to use default file splice read and write, which is not compatiple with overlayfs, returning EFAULT. xfstests generic/591 can reproduce part of this. Tested this patch with xfstests auto group tests. Signed-off-by: Murphy Zhou Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'fs') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 4d1796c37dfd..67ef4c8cfdc7 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include #include "overlayfs.h" struct ovl_aio_req { @@ -362,6 +365,48 @@ out_unlock: return ret; } +static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + ssize_t ret; + struct fd real; + const struct cred *old_cred; + + ret = ovl_real_fdget(in, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(in)->i_sb); + ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(in); + fdput(real); + return ret; +} + +static ssize_t +ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct fd real; + const struct cred *old_cred; + ssize_t ret; + + ret = ovl_real_fdget(out, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(out)->i_sb); + ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(out); + fdput(real); + return ret; +} + static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd real; @@ -718,6 +763,8 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .unlocked_ioctl = ovl_ioctl, .compat_ioctl = ovl_compat_ioctl, + .splice_read = ovl_splice_read, + .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, -- cgit v1.2.3-59-g8ed1b From a4ac9d45c0cd14a2adc872186431c79804b77dbf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 3 Feb 2020 11:41:53 +0100 Subject: ovl: fix lseek overflow on 32bit ovl_lseek() is using ssize_t to return the value from vfs_llseek(). On a 32-bit kernel ssize_t is a 32-bit signed int, which overflows above 2 GB. Assign the return value of vfs_llseek() to loff_t to fix this. Reported-by: Boris Gjenero Fixes: 9e46b840c705 ("ovl: support stacked SEEK_HOLE/SEEK_DATA") Cc: # v4.19 Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 67ef4c8cfdc7..a5317216de73 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -157,7 +157,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) struct inode *inode = file_inode(file); struct fd real; const struct cred *old_cred; - ssize_t ret; + loff_t ret; /* * The two special cases below do not need to involve real fs, -- cgit v1.2.3-59-g8ed1b