diff options
Diffstat (limited to 'fs/overlayfs/readdir.c')
-rw-r--r-- | fs/overlayfs/readdir.c | 261 |
1 files changed, 169 insertions, 92 deletions
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 40ac9ce2465a..2b210640036c 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -170,7 +170,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, return p; } -static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, +static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -179,22 +179,22 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, struct ovl_cache_entry *p; if (ovl_cache_entry_find_link(name, len, &newp, &parent)) - return 0; + return true; p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; - return -ENOMEM; + return false; } list_add_tail(&p->l_node, rdd->list); rb_link_node(&p->node, parent, newp); rb_insert_color(&p->node, rdd->root); - return 0; + return true; } -static int ovl_fill_lowest(struct ovl_readdir_data *rdd, +static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -211,7 +211,7 @@ static int ovl_fill_lowest(struct ovl_readdir_data *rdd, list_add_tail(&p->l_node, &rdd->middle); } - return rdd->err; + return rdd->err == 0; } void ovl_cache_free(struct list_head *list) @@ -250,7 +250,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) } } -static int ovl_fill_merge(struct dir_context *ctx, const char *name, +static bool ovl_fill_merge(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -264,11 +264,11 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); } -static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) { int err; struct ovl_cache_entry *p; - struct dentry *dentry; + struct dentry *dentry, *dir = path->dentry; const struct cred *old_cred; old_cred = ovl_override_creds(rdd->dentry->d_sb); @@ -278,7 +278,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) while (rdd->first_maybe_whiteout) { p = rdd->first_maybe_whiteout; rdd->first_maybe_whiteout = p->next_maybe_whiteout; - dentry = lookup_one_len(p->name, dir, p->len); + dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); if (!IS_ERR(dentry)) { p->is_whiteout = ovl_is_whiteout(dentry); dput(dentry); @@ -291,13 +291,13 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) return err; } -static inline int ovl_dir_read(struct path *realpath, +static inline int ovl_dir_read(const struct path *realpath, struct ovl_readdir_data *rdd) { struct file *realfile; int err; - realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); + realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE); if (IS_ERR(realfile)) return PTR_ERR(realfile); @@ -312,25 +312,13 @@ static inline int ovl_dir_read(struct path *realpath, } while (!err && rdd->count); if (!err && rdd->first_maybe_whiteout && rdd->dentry) - err = ovl_check_whiteouts(realpath->dentry, rdd); + err = ovl_check_whiteouts(realpath, rdd); fput(realfile); return err; } -/* - * Can we iterate real dir directly? - * - * Non-merge dir may contain whiteouts from a time it was a merge upper, before - * lower dir was removed under it and possibly before it was rotated from upper - * to lower layer. - */ -static bool ovl_dir_is_real(struct dentry *dir) -{ - return !ovl_test_flag(OVL_WHITEOUTS, d_inode(dir)); -} - static void ovl_dir_reset(struct file *file) { struct ovl_dir_file *od = file->private_data; @@ -438,15 +426,23 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) /* Map inode number to lower fs unique range */ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, - const char *name, int namelen) + const char *name, int namelen, bool warn) { - if (ino >> (64 - xinobits)) { - pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", - namelen, name, ino, xinobits); + unsigned int xinoshift = 64 - xinobits; + + if (unlikely(ino >> xinoshift)) { + if (warn) { + pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + namelen, name, ino, xinobits); + } return ino; } - return ino | ((u64)fsid) << (64 - xinobits); + /* + * The lowest xinobit is reserved for mapping the non-peresistent inode + * numbers range, but this range is only exposed via st_ino, not here. + */ + return ino | ((u64)fsid) << (xinoshift + 1); } /* @@ -459,7 +455,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, * copy up origin, call vfs_getattr() on the overlay entry to make * sure that d_ino will be consistent with st_ino from stat(2). */ -static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) +static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p) { struct dentry *dir = path->dentry; @@ -483,8 +479,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) goto get; } } - this = lookup_one_len(p->name, dir, p->len); + this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { + /* Mark a stale entry */ + p->is_whiteout = true; if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -515,7 +513,8 @@ get: } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, ovl_layer_lower(this)->fsid, - p->name, p->len); + p->name, p->len, + ovl_xino_warn(dir->d_sb)); } out: @@ -529,7 +528,7 @@ fail: goto out; } -static int ovl_fill_plain(struct dir_context *ctx, const char *name, +static bool ovl_fill_plain(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -541,14 +540,14 @@ static int ovl_fill_plain(struct dir_context *ctx, const char *name, p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) { rdd->err = -ENOMEM; - return -ENOMEM; + return false; } list_add_tail(&p->l_node, rdd->list); - return 0; + return true; } -static int ovl_dir_read_impure(struct path *path, struct list_head *list, +static int ovl_dir_read_impure(const struct path *path, struct list_head *list, struct rb_root *root) { int err; @@ -593,10 +592,11 @@ static int ovl_dir_read_impure(struct path *path, struct list_head *list, return 0; } -static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) +static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) { int res; struct dentry *dentry = path->dentry; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct ovl_dir_cache *cache; cache = ovl_dir_cache(d_inode(dentry)); @@ -623,8 +623,8 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) * Removing the "impure" xattr is best effort. */ if (!ovl_want_write(dentry)) { - ovl_do_removexattr(ovl_dentry_upper(dentry), - OVL_XATTR_IMPURE); + ovl_removexattr(ofs, ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); ovl_drop_write(dentry); } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); @@ -645,9 +645,10 @@ struct ovl_readdir_translate { u64 parent_ino; int fsid; int xinobits; + bool xinowarn; }; -static int ovl_fill_real(struct dir_context *ctx, const char *name, +static bool ovl_fill_real(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -665,7 +666,7 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, ino = p->ino; } else if (rdt->xinobits) { ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, - name, namelen); + name, namelen, rdt->xinowarn); } return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); @@ -696,6 +697,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) .ctx.actor = ovl_fill_real, .orig_ctx = ctx, .xinobits = ovl_xino_bits(dir->d_sb), + .xinowarn = ovl_xino_warn(dir->d_sb), }; if (rdt.xinobits && lower_layer) @@ -732,8 +734,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; struct ovl_cache_entry *p; + const struct cred *old_cred; int err; + old_cred = ovl_override_creds(dentry->d_sb); if (!ctx->pos) ovl_dir_reset(file); @@ -747,17 +751,20 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) (ovl_same_fs(dentry->d_sb) && (ovl_is_impure_dir(file) || OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { - return ovl_iterate_real(file, ctx); + err = ovl_iterate_real(file, ctx); + } else { + err = iterate_dir(od->realfile, ctx); } - return iterate_dir(od->realfile, ctx); + goto out; } if (!od->cache) { struct ovl_dir_cache *cache; cache = ovl_cache_get(dentry); + err = PTR_ERR(cache); if (IS_ERR(cache)) - return PTR_ERR(cache); + goto out; od->cache = cache; ovl_seek_cursor(od, ctx->pos); @@ -769,15 +776,21 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) if (!p->ino) { err = ovl_cache_update_ino(&file->f_path, p); if (err) - return err; + goto out; } + } + /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; } od->cursor = p->l_node.next; ctx->pos++; } - return 0; + err = 0; +out: + revert_creds(old_cred); + return err; } static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) @@ -820,47 +833,77 @@ out_unlock: return res; } -static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - int datasync) +static struct file *ovl_dir_open_realfile(const struct file *file, + const struct path *realpath) { + struct file *res; + const struct cred *old_cred; + + old_cred = ovl_override_creds(file_inode(file)->i_sb); + res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); + revert_creds(old_cred); + + return res; +} + +/* + * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. + * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. + * + * TODO: use same abstract type for file->private_data of dir and file so + * upperfile could also be cached for files as well. + */ +struct file *ovl_dir_real_file(const struct file *file, bool want_upper) +{ + struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; - struct file *realfile = od->realfile; + struct file *old, *realfile = od->realfile; - /* Nothing to sync for lower */ if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) - return 0; + return want_upper ? NULL : realfile; /* * Need to check if we started out being a lower dir, but got copied up */ if (!od->is_upper) { - struct inode *inode = file_inode(file); - realfile = READ_ONCE(od->upperfile); if (!realfile) { struct path upperpath; ovl_path_upper(dentry, &upperpath); - realfile = ovl_path_open(&upperpath, O_RDONLY); - - inode_lock(inode); - if (!od->upperfile) { - if (IS_ERR(realfile)) { - inode_unlock(inode); - return PTR_ERR(realfile); - } - smp_store_release(&od->upperfile, realfile); - } else { - /* somebody has beaten us to it */ - if (!IS_ERR(realfile)) - fput(realfile); - realfile = od->upperfile; + realfile = ovl_dir_open_realfile(file, &upperpath); + if (IS_ERR(realfile)) + return realfile; + + old = cmpxchg_release(&od->upperfile, NULL, realfile); + if (old) { + fput(realfile); + realfile = old; } - inode_unlock(inode); } } + return realfile; +} + +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct file *realfile; + int err; + + err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb)); + if (err <= 0) + return err; + + realfile = ovl_dir_real_file(file, true); + err = PTR_ERR_OR_ZERO(realfile); + + /* Nothing to sync for lower */ + if (!realfile || err) + return err; + return vfs_fsync_range(realfile, start, end, datasync); } @@ -893,7 +936,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) return -ENOMEM; type = ovl_path_real(file->f_path.dentry, &realpath); - realfile = ovl_path_open(&realpath, file->f_flags); + realfile = ovl_dir_open_realfile(file, &realpath); if (IS_ERR(realfile)) { kfree(od); return PTR_ERR(realfile); @@ -958,7 +1001,8 @@ del_entry: return err; } -void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) +void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, + struct list_head *list) { struct ovl_cache_entry *p; @@ -969,7 +1013,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) if (WARN_ON(!p->is_whiteout || !p->is_upper)) continue; - dentry = lookup_one_len(p->name, upper, p->len); + dentry = ovl_lookup_upper(ofs, p->name, upper, p->len); if (IS_ERR(dentry)) { pr_err("lookup '%s/%.*s' failed (%i)\n", upper->d_name.name, p->len, p->name, @@ -977,13 +1021,13 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) continue; } if (dentry->d_inode) - ovl_cleanup(upper->d_inode, dentry); + ovl_cleanup(ofs, upper->d_inode, dentry); dput(dentry); } inode_unlock(upper->d_inode); } -static int ovl_check_d_type(struct dir_context *ctx, const char *name, +static bool ovl_check_d_type(struct dir_context *ctx, const char *name, int namelen, loff_t offset, u64 ino, unsigned int d_type) { @@ -992,19 +1036,19 @@ static int ovl_check_d_type(struct dir_context *ctx, const char *name, /* Even if d_type is not supported, DT_DIR is returned for . and .. */ if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) - return 0; + return true; if (d_type != DT_UNKNOWN) rdd->d_type_supported = true; - return 0; + return true; } /* * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values * if error is encountered. */ -int ovl_check_d_type_supported(struct path *realpath) +int ovl_check_d_type_supported(const struct path *realpath) { int err; struct ovl_readdir_data rdd = { @@ -1019,7 +1063,10 @@ int ovl_check_d_type_supported(struct path *realpath) return rdd.d_type_supported; } -static void ovl_workdir_cleanup_recurse(struct path *path, int level) +#define OVL_INCOMPATDIR_NAME "incompat" + +static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, + int level) { int err; struct inode *dir = path->dentry->d_inode; @@ -1033,6 +1080,19 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level) .root = &root, .is_lowest = false, }; + bool incompat = false; + + /* + * The "work/incompat" directory is treated specially - if it is not + * empty, instead of printing a generic error and mounting read-only, + * we will error about incompat features and fail the mount. + * + * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name + * starts with '#'. + */ + if (level == 2 && + !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) + incompat = true; err = ovl_dir_read(path, &rdd); if (err) @@ -1047,38 +1107,48 @@ static void ovl_workdir_cleanup_recurse(struct path *path, int level) continue; if (p->len == 2 && p->name[1] == '.') continue; + } else if (incompat) { + pr_err("overlay with incompat feature '%s' cannot be mounted\n", + p->name); + err = -EINVAL; + break; } - dentry = lookup_one_len(p->name, path->dentry, p->len); + dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len); if (IS_ERR(dentry)) continue; if (dentry->d_inode) - ovl_workdir_cleanup(dir, path->mnt, dentry, level); + err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level); dput(dentry); + if (err) + break; } inode_unlock(dir); out: ovl_cache_free(&list); + return err; } -void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, - struct dentry *dentry, int level) +int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, + struct vfsmount *mnt, struct dentry *dentry, int level) { int err; if (!d_is_dir(dentry) || level > 1) { - ovl_cleanup(dir, dentry); - return; + return ovl_cleanup(ofs, dir, dentry); } - err = ovl_do_rmdir(dir, dentry); + err = ovl_do_rmdir(ofs, dir, dentry); if (err) { struct path path = { .mnt = mnt, .dentry = dentry }; inode_unlock(dir); - ovl_workdir_cleanup_recurse(&path, level + 1); + err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1); inode_lock_nested(dir, I_MUTEX_PARENT); - ovl_cleanup(dir, dentry); + if (!err) + err = ovl_cleanup(ofs, dir, dentry); } + + return err; } int ovl_indexdir_cleanup(struct ovl_fs *ofs) @@ -1087,7 +1157,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) struct dentry *indexdir = ofs->indexdir; struct dentry *index = NULL; struct inode *dir = indexdir->d_inode; - struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir }; + struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; LIST_HEAD(list); struct rb_root root = RB_ROOT; struct ovl_cache_entry *p; @@ -1111,18 +1181,25 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) if (p->len == 2 && p->name[1] == '.') continue; } - index = lookup_one_len(p->name, indexdir, p->len); + index = ovl_lookup_upper(ofs, p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; break; } + /* Cleanup leftover from index create/cleanup attempt */ + if (index->d_name.name[0] == '#') { + err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1); + if (err) + break; + goto next; + } err = ovl_verify_index(ofs, index); if (!err) { goto next; } else if (err == -ESTALE) { /* Cleanup stale index entries */ - err = ovl_cleanup(dir, index); + err = ovl_cleanup(ofs, dir, index); } else if (err != -ENOENT) { /* * Abort mount to avoid corrupting the index if @@ -1135,10 +1212,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) * Whiteout orphan index to block future open by * handle after overlay nlink dropped to zero. */ - err = ovl_cleanup_and_whiteout(indexdir, dir, index); + err = ovl_cleanup_and_whiteout(ofs, dir, index); } else { /* Cleanup orphan index entries */ - err = ovl_cleanup(dir, index); + err = ovl_cleanup(ofs, dir, index); } if (err) |