From 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Apr 2014 17:08:43 +0200 Subject: vfs: add RENAME_NOREPLACE flag If this flag is specified and the target of the rename exists then the rename syscall fails with EEXIST. The VFS does the existence checking, so it is trivial to enable for most local filesystems. This patch only enables it in ext4. For network filesystems the VFS check is not enough as there may be a race between a remote create and the rename, so these filesystems need to handle this flag in their ->rename() implementations to ensure atomicity. Andy writes about why this is useful: "The trivial answer: to eliminate the race condition from 'mv -i'. Another answer: there's a common pattern to atomically create a file with contents: open a temporary file, write to it, optionally fsync it, close it, then link(2) it to the final name, then unlink the temporary file. The reason to use link(2) is because it won't silently clobber the destination. This is annoying: - It requires an extra system call that shouldn't be necessary. - It doesn't work on (IMO sensible) filesystems that don't support hard links (e.g. vfat). - It's not atomic -- there's an intermediate state where both files exist. - It's ugly. The new rename flag will make this totally sensible. To be fair, on new enough kernels, you can also use O_TMPFILE and linkat to achieve the same thing even more cleanly." Suggested-by: Andy Lutomirski Signed-off-by: Miklos Szeredi Reviewed-by: J. Bruce Fields --- include/uapi/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 6c28b61bb690..9250f4dd7d96 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -35,6 +35,8 @@ #define SEEK_HOLE 4 /* seek to the next hole */ #define SEEK_MAX SEEK_HOLE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ + struct fstrim_range { __u64 start; __u64 len; -- cgit v1.2.3-59-g8ed1b From da1ce0670c14d8380e423a3239e562a1dc15fa9e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Apr 2014 17:08:43 +0200 Subject: vfs: add cross-rename If flags contain RENAME_EXCHANGE then exchange source and destination files. There's no restriction on the type of the files; e.g. a directory can be exchanged with a symlink. Signed-off-by: Miklos Szeredi Reviewed-by: Jan Kara Reviewed-by: J. Bruce Fields --- fs/dcache.c | 50 ++++++++++++++++++----- fs/namei.c | 104 +++++++++++++++++++++++++++++++++--------------- include/linux/dcache.h | 1 + include/uapi/linux/fs.h | 1 + security/security.c | 16 ++++++++ 5 files changed, 131 insertions(+), 41 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/dcache.c b/fs/dcache.c index ca02c13a84aa..66cba5a8a346 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2483,12 +2483,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) dentry->d_name.name = dentry->d_iname; } else { /* - * Both are internal. Just copy target to dentry + * Both are internal. */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.len = target->d_name.len; - return; + unsigned int i; + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + swap(((long *) &dentry->d_iname)[i], + ((long *) &target->d_iname)[i]); + } } } swap(dentry->d_name.len, target->d_name.len); @@ -2545,13 +2547,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * __d_move - move a dentry * @dentry: entry to move * @target: new dentry + * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ -static void __d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2573,8 +2577,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) __d_drop(dentry); __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - /* Unhash the target: dput() will then get rid of it */ + /* + * Unhash the target (d_delete() is not usable here). If exchanging + * the two dentries, then rehash onto the other's hash queue. + */ __d_drop(target); + if (exchange) { + __d_rehash(target, + d_hash(dentry->d_parent, dentry->d_name.hash)); + } list_del(&dentry->d_u.d_child); list_del(&target->d_u.d_child); @@ -2601,6 +2612,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) write_seqcount_end(&dentry->d_seq); dentry_unlock_parents_for_move(dentry, target); + if (exchange) + fsnotify_d_move(target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2618,11 +2631,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); - __d_move(dentry, target); + __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); +/* + * d_exchange - exchange two dentries + * @dentry1: first dentry + * @dentry2: second dentry + */ +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) +{ + write_seqlock(&rename_lock); + + WARN_ON(!dentry1->d_inode); + WARN_ON(!dentry2->d_inode); + WARN_ON(IS_ROOT(dentry1)); + WARN_ON(IS_ROOT(dentry2)); + + __d_move(dentry1, dentry2, true); + + write_sequnlock(&rename_lock); +} + /** * d_ancestor - search for an ancestor * @p1: ancestor dentry @@ -2670,7 +2702,7 @@ static struct dentry *__d_unalias(struct inode *inode, m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry); + __d_move(alias, dentry, false); ret = alias; } out_err: diff --git a/fs/namei.c b/fs/namei.c index 4096d589bb3f..c1178880f23c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4031,6 +4031,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, const unsigned char *old_name; struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; + bool new_is_dir = false; + unsigned max_links = new_dir->i_sb->s_max_links; if (source == target) return 0; @@ -4039,10 +4041,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - if (!target) + if (!target) { error = may_create(new_dir, new_dentry); - else - error = may_delete(new_dir, new_dentry, is_dir); + } else { + new_is_dir = d_is_dir(new_dentry); + + if (!(flags & RENAME_EXCHANGE)) + error = may_delete(new_dir, new_dentry, is_dir); + else + error = may_delete(new_dir, new_dentry, new_is_dir); + } if (error) return error; @@ -4056,10 +4064,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ - if (is_dir && new_dir != old_dir) { - error = inode_permission(source, MAY_WRITE); - if (error) - return error; + if (new_dir != old_dir) { + if (is_dir) { + error = inode_permission(source, MAY_WRITE); + if (error) + return error; + } + if ((flags & RENAME_EXCHANGE) && new_is_dir) { + error = inode_permission(target, MAY_WRITE); + if (error) + return error; + } } error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, @@ -4069,7 +4084,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_name = fsnotify_oldname_init(old_dentry->d_name.name); dget(new_dentry); - if (!is_dir) + if (!is_dir || (flags & RENAME_EXCHANGE)) lock_two_nondirectories(source, target); else if (target) mutex_lock(&target->i_mutex); @@ -4078,25 +4093,25 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; - if (is_dir) { - unsigned max_links = new_dir->i_sb->s_max_links; - + if (max_links && new_dir != old_dir) { error = -EMLINK; - if (max_links && !target && new_dir != old_dir && - new_dir->i_nlink >= max_links) + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) goto out; - - if (target) - shrink_dcache_parent(new_dentry); - } else { + if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && + old_dir->i_nlink >= max_links) + goto out; + } + if (is_dir && !(flags & RENAME_EXCHANGE) && target) + shrink_dcache_parent(new_dentry); + if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) goto out; - if (target) { - error = try_break_deleg(target, delegated_inode); - if (error) - goto out; - } + } + if (target && !new_is_dir) { + error = try_break_deleg(target, delegated_inode); + if (error) + goto out; } if (!flags) { error = old_dir->i_op->rename(old_dir, old_dentry, @@ -4108,22 +4123,31 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) goto out; - if (target) { + if (!(flags & RENAME_EXCHANGE) && target) { if (is_dir) target->i_flags |= S_DEAD; dont_mount(new_dentry); } - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { + if (!(flags & RENAME_EXCHANGE)) + d_move(old_dentry, new_dentry); + else + d_exchange(old_dentry, new_dentry); + } out: - if (!is_dir) + if (!is_dir || (flags & RENAME_EXCHANGE)) unlock_two_nondirectories(source, target); else if (target) mutex_unlock(&target->i_mutex); dput(new_dentry); - if (!error) + if (!error) { fsnotify_move(old_dir, new_dir, old_name, is_dir, - target, old_dentry); + !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); + if (flags & RENAME_EXCHANGE) { + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, + new_is_dir, NULL, new_dentry); + } + } fsnotify_oldname_free(old_name); return error; @@ -4143,7 +4167,10 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, bool should_retry = false; int error; - if (flags & ~RENAME_NOREPLACE) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) return -EINVAL; retry: @@ -4180,7 +4207,8 @@ retry: oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; - newnd.flags |= LOOKUP_RENAME_TARGET; + if (!(flags & RENAME_EXCHANGE)) + newnd.flags |= LOOKUP_RENAME_TARGET; retry_deleg: trap = lock_rename(new_dir, old_dir); @@ -4200,12 +4228,23 @@ retry_deleg: error = -EEXIST; if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) goto exit5; + if (flags & RENAME_EXCHANGE) { + error = -ENOENT; + if (d_is_negative(new_dentry)) + goto exit5; + + if (!d_is_dir(new_dentry)) { + error = -ENOTDIR; + if (newnd.last.name[newnd.last.len]) + goto exit5; + } + } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) goto exit5; - if (newnd.last.name[newnd.last.len]) + if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) goto exit5; } /* source should not be ancestor of target */ @@ -4213,7 +4252,8 @@ retry_deleg: if (old_dentry == trap) goto exit5; /* target should not be an ancestor of source */ - error = -ENOTEMPTY; + if (!(flags & RENAME_EXCHANGE)) + error = -ENOTEMPTY; if (new_dentry == trap) goto exit5; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3b50cac7ccb3..3b9bfdb83ba6 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); +extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 9250f4dd7d96..ca1a11bb4443 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -36,6 +36,7 @@ #define SEEK_MAX SEEK_HOLE #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ struct fstrim_range { __u64 start; diff --git a/security/security.c b/security/security.c index 284fbc99aa9d..8b774f362a3d 100644 --- a/security/security.c +++ b/security/security.c @@ -439,6 +439,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, if (unlikely(IS_PRIVATE(old_dentry->d_inode) || (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) return 0; + + if (flags & RENAME_EXCHANGE) { + int err = security_ops->path_rename(new_dir, new_dentry, + old_dir, old_dentry); + if (err) + return err; + } + return security_ops->path_rename(old_dir, old_dentry, new_dir, new_dentry); } @@ -531,6 +539,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(IS_PRIVATE(old_dentry->d_inode) || (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) return 0; + + if (flags & RENAME_EXCHANGE) { + int err = security_ops->inode_rename(new_dir, new_dentry, + old_dir, old_dentry); + if (err) + return err; + } + return security_ops->inode_rename(old_dir, old_dentry, new_dir, new_dentry); } -- cgit v1.2.3-59-g8ed1b