From 3658840cd363f2be094f5dfd2f0b174a9055dd0f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:21 +0200 Subject: ext4: Remove ext4 locking of moved directory Remove locking of moved directory in ext4_rename2(). We will take care of it in VFS instead. This effectively reverts commit 0813299c586b ("ext4: Fix possible corruption when moving a directory") and followup fixes. CC: Ted Tso CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-1-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/ext4/namei.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 45b579805c95..0caf6c730ce3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3834,19 +3834,10 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, return retval; } - /* - * We need to protect against old.inode directory getting converted - * from inline directory format into a normal one. - */ - if (S_ISDIR(old.inode->i_mode)) - inode_lock_nested(old.inode, I_MUTEX_NONDIR2); - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); - if (IS_ERR(old.bh)) { - retval = PTR_ERR(old.bh); - goto unlock_moved_dir; - } + if (IS_ERR(old.bh)) + return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. @@ -4043,10 +4034,6 @@ release_bh: brelse(old.bh); brelse(new.bh); -unlock_moved_dir: - if (S_ISDIR(old.inode->i_mode)) - inode_unlock(old.inode); - return retval; } -- cgit v1.2.3-59-g8ed1b From 7517ce5dc4d6963ef9ace2acb3f081ef8cd8a1e3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:22 +0200 Subject: Revert "udf: Protect rename against modification of moved directory" This reverts commit f950fd0529130a617b3da526da9fb6a896ce87c2. The locking is going to be provided by vfs_rename() in the following patches. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-2-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/udf/namei.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index fd20423d3ed2..fd29a66e7241 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -793,11 +793,6 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (!empty_dir(new_inode)) goto out_oiter; } - /* - * We need to protect against old_inode getting converted from - * ICB to normal directory. - */ - inode_lock_nested(old_inode, I_MUTEX_NONDIR2); retval = udf_fiiter_find_entry(old_inode, &dotdot_name, &diriter); if (retval == -ENOENT) { @@ -806,10 +801,8 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, old_inode->i_ino); retval = -EFSCORRUPTED; } - if (retval) { - inode_unlock(old_inode); + if (retval) goto out_oiter; - } has_diriter = true; tloc = lelb_to_cpu(diriter.fi.icb.extLocation); if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != @@ -889,7 +882,6 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, udf_dir_entry_len(&diriter.fi)); udf_fiiter_write_fi(&diriter, NULL); udf_fiiter_release(&diriter); - inode_unlock(old_inode); inode_dec_link_count(old_dir); if (new_inode) @@ -901,10 +893,8 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir, } return 0; out_oiter: - if (has_diriter) { + if (has_diriter) udf_fiiter_release(&diriter); - inode_unlock(old_inode); - } udf_fiiter_release(&oiter); return retval; -- cgit v1.2.3-59-g8ed1b From cde3c9d7e2a359e337216855dcb333a19daaa436 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:23 +0200 Subject: Revert "f2fs: fix potential corruption when moving a directory" This reverts commit d94772154e524b329a168678836745d2773a6e02. The locking is going to be provided by VFS. CC: Jaegeuk Kim CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-3-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/f2fs/namei.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 77a71276ecb1..ad597b417fea 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -995,20 +995,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, goto out; } - /* - * Copied from ext4_rename: we need to protect against old.inode - * directory getting converted from inline directory format into - * a normal one. - */ - if (S_ISDIR(old_inode->i_mode)) - inode_lock_nested(old_inode, I_MUTEX_NONDIR2); - err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); - goto out_unlock_old; + goto out; } if (S_ISDIR(old_inode->i_mode)) { @@ -1116,9 +1108,6 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, f2fs_unlock_op(sbi); - if (S_ISDIR(old_inode->i_mode)) - inode_unlock(old_inode); - if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); @@ -1133,9 +1122,6 @@ out_dir: f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); -out_unlock_old: - if (S_ISDIR(old_inode->i_mode)) - inode_unlock(old_inode); out: iput(whiteout); return err; -- cgit v1.2.3-59-g8ed1b From f23ce757185319886ca80c4864ce5f81ac6cc9e9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:24 +0200 Subject: fs: Establish locking order for unrelated directories Currently the locking order of inode locks for directories that are not in ancestor relationship is not defined because all operations that needed to lock two directories like this were serialized by sb->s_vfs_rename_mutex. However some filesystems need to lock two subdirectories for RENAME_EXCHANGE operations and for this we need the locking order established even for two tree-unrelated directories. Provide a helper function lock_two_inodes() that establishes lock ordering for any two inodes and use it in lock_two_directories(). CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-4-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/inode.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 2 ++ fs/namei.c | 4 ++-- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 577799b7855f..b9d498032270 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1103,6 +1103,48 @@ void discard_new_inode(struct inode *inode) } EXPORT_SYMBOL(discard_new_inode); +/** + * lock_two_inodes - lock two inodes (may be regular files but also dirs) + * + * Lock any non-NULL argument. The caller must make sure that if he is passing + * in two directories, one is not ancestor of the other. Zero, one or two + * objects may be locked by this function. + * + * @inode1: first inode to lock + * @inode2: second inode to lock + * @subclass1: inode lock subclass for the first lock obtained + * @subclass2: inode lock subclass for the second lock obtained + */ +void lock_two_inodes(struct inode *inode1, struct inode *inode2, + unsigned subclass1, unsigned subclass2) +{ + if (!inode1 || !inode2) { + /* + * Make sure @subclass1 will be used for the acquired lock. + * This is not strictly necessary (no current caller cares) but + * let's keep things consistent. + */ + if (!inode1) + swap(inode1, inode2); + goto lock; + } + + /* + * If one object is directory and the other is not, we must make sure + * to lock directory first as the other object may be its child. + */ + if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) { + if (inode1 > inode2) + swap(inode1, inode2); + } else if (!S_ISDIR(inode1->i_mode)) + swap(inode1, inode2); +lock: + if (inode1) + inode_lock_nested(inode1, subclass1); + if (inode2 && inode2 != inode1) + inode_lock_nested(inode2, subclass2); +} + /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * diff --git a/fs/internal.h b/fs/internal.h index bd3b2810a36b..377030a50aca 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -152,6 +152,8 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry); bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid); +void lock_two_inodes(struct inode *inode1, struct inode *inode2, + unsigned subclass1, unsigned subclass2); /* * fs-writeback.c diff --git a/fs/namei.c b/fs/namei.c index e4fe0879ae55..148570aabe74 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3028,8 +3028,8 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) return p; } - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + lock_two_inodes(p1->d_inode, p2->d_inode, + I_MUTEX_PARENT, I_MUTEX_PARENT2); return NULL; } -- cgit v1.2.3-59-g8ed1b From 28eceeda130f5058074dd007d9c59d2e8bc5af2e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:25 +0200 Subject: fs: Lock moved directories When a directory is moved to a different directory, some filesystems (udf, ext4, ocfs2, f2fs, and likely gfs2, reiserfs, and others) need to update their pointer to the parent and this must not race with other operations on the directory. Lock the directories when they are moved. Although not all filesystems need this locking, we perform it in vfs_rename() because getting the lock ordering right is really difficult and we don't want to expose these locking details to filesystems. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-5-jack@suse.cz> Signed-off-by: Christian Brauner --- Documentation/filesystems/directory-locking.rst | 26 +++++++++++++------------ fs/namei.c | 22 +++++++++++++-------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst index 504ba940c36c..dccd61c7c5c3 100644 --- a/Documentation/filesystems/directory-locking.rst +++ b/Documentation/filesystems/directory-locking.rst @@ -22,12 +22,11 @@ exclusive. 3) object removal. Locking rules: caller locks parent, finds victim, locks victim and calls the method. Locks are exclusive. -4) rename() that is _not_ cross-directory. Locking rules: caller locks -the parent and finds source and target. In case of exchange (with -RENAME_EXCHANGE in flags argument) lock both. In any case, -if the target already exists, lock it. If the source is a non-directory, -lock it. If we need to lock both, lock them in inode pointer order. -Then call the method. All locks are exclusive. +4) rename() that is _not_ cross-directory. Locking rules: caller locks the +parent and finds source and target. We lock both (provided they exist). If we +need to lock two inodes of different type (dir vs non-dir), we lock directory +first. If we need to lock two inodes of the same type, lock them in inode +pointer order. Then call the method. All locks are exclusive. NB: we might get away with locking the source (and target in exchange case) shared. @@ -44,15 +43,17 @@ All locks are exclusive. rules: * lock the filesystem - * lock parents in "ancestors first" order. + * lock parents in "ancestors first" order. If one is not ancestor of + the other, lock them in inode pointer order. * find source and target. * if old parent is equal to or is a descendent of target fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * If it's an exchange, lock both the source and the target. - * If the target exists, lock it. If the source is a non-directory, - lock it. If we need to lock both, do so in inode pointer order. + * Lock both the source and the target provided they exist. If we + need to lock two inodes of different type (dir vs non-dir), we lock + the directory first. If we need to lock two inodes of the same type, + lock them in inode pointer order. * call the method. All ->i_rwsem are taken exclusive. Again, we might get away with locking @@ -66,8 +67,9 @@ If no directory is its own ancestor, the scheme above is deadlock-free. Proof: - First of all, at any moment we have a partial ordering of the - objects - A < B iff A is an ancestor of B. + First of all, at any moment we have a linear ordering of the + objects - A < B iff (A is an ancestor of B) or (B is not an ancestor + of A and ptr(A) < ptr(B)). That ordering can change. However, the following is true: diff --git a/fs/namei.c b/fs/namei.c index 148570aabe74..6a5e26a529e1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4731,7 +4731,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source (if it is not a directory). + * and source. * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4815,10 +4815,16 @@ int vfs_rename(struct renamedata *rd) take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); - if (!is_dir || (flags & RENAME_EXCHANGE)) - lock_two_nondirectories(source, target); - else if (target) - inode_lock(target); + /* + * Lock all moved children. Moved directories may need to change parent + * pointer so they need the lock to prevent against concurrent + * directory changes moving parent pointer. For regular files we've + * historically always done this. The lockdep locking subclasses are + * somewhat arbitrary but RENAME_EXCHANGE in particular can swap + * regular files and directories so it's difficult to tell which + * subclasses to use. + */ + lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) @@ -4866,9 +4872,9 @@ int vfs_rename(struct renamedata *rd) d_exchange(old_dentry, new_dentry); } out: - if (!is_dir || (flags & RENAME_EXCHANGE)) - unlock_two_nondirectories(source, target); - else if (target) + if (source) + inode_unlock(source); + if (target) inode_unlock(target); dput(new_dentry); if (!error) { -- cgit v1.2.3-59-g8ed1b From 2454ad83b90afbc6ed2c22ec1310b624c40bf0d3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:26 +0200 Subject: fs: Restrict lock_two_nondirectories() to non-directory inodes Currently lock_two_nondirectories() is skipping any passed directories. After vfs_rename() uses lock_two_inodes(), all the remaining four users of this function pass only regular files to it. So drop the somewhat unusual "skip directory" logic and instead warn if anybody passes directory to it. This also allows us to use lock_two_inodes() in lock_two_nondirectories() to concentrate the lock ordering logic in less places. Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-6-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/inode.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index b9d498032270..53ae3b76d232 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1148,7 +1148,7 @@ lock: /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * - * Lock any non-NULL argument that is not a directory. + * Lock any non-NULL argument. Passed objects must not be directories. * Zero, one or two objects may be locked by this function. * * @inode1: first inode to lock @@ -1156,13 +1156,9 @@ lock: */ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) { - if (inode1 > inode2) - swap(inode1, inode2); - - if (inode1 && !S_ISDIR(inode1->i_mode)) - inode_lock(inode1); - if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) - inode_lock_nested(inode2, I_MUTEX_NONDIR2); + WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); + WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); + lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); } EXPORT_SYMBOL(lock_two_nondirectories); @@ -1173,10 +1169,14 @@ EXPORT_SYMBOL(lock_two_nondirectories); */ void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) { - if (inode1 && !S_ISDIR(inode1->i_mode)) + if (inode1) { + WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); inode_unlock(inode1); - if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) + } + if (inode2 && inode2 != inode1) { + WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); inode_unlock(inode2); + } } EXPORT_SYMBOL(unlock_two_nondirectories); -- cgit v1.2.3-59-g8ed1b