aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/namei.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 10:31:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 10:31:08 -0700
commit96485e4462604744d66bf4301557d996d80b85eb (patch)
tree4275cc178466bd877980c3380dcfaf9137bf154f /fs/ext4/namei.c
parentMerge branch 'work.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs (diff)
parentext4: fix invalid inode checksum (diff)
downloadlinux-dev-96485e4462604744d66bf4301557d996d80b85eb.tar.xz
linux-dev-96485e4462604744d66bf4301557d996d80b85eb.zip
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "The siginificant new ext4 feature this time around is Harshad's new fast_commit mode. In addition, thanks to Mauricio for fixing a race where mmap'ed pages that are being changed in parallel with a data=journal transaction commit could result in bad checksums in the failure that could cause journal replays to fail. Also notable is Ritesh's buffered write optimization which can result in significant improvements on parallel write workloads. (The kernel test robot reported a 330.6% improvement on fio.write_iops on a 96 core system using DAX) Besides that, we have the usual miscellaneous cleanups and bug fixes" Link: https://lore.kernel.org/r/20200925071217.GO28663@shao2-debian * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits) ext4: fix invalid inode checksum ext4: add fast commit stats in procfs ext4: add a mount opt to forcefully turn fast commits on ext4: fast commit recovery path jbd2: fast commit recovery path ext4: main fast-commit commit path jbd2: add fast commit machinery ext4 / jbd2: add fast commit initialization ext4: add fast_commit feature and handling for extended mount options doc: update ext4 and journalling docs to include fast commit feature ext4: Detect already used quota file early jbd2: avoid transaction reuse after reformatting ext4: use the normal helper to get the actual inode ext4: fix bs < ps issue reported with dioread_nolock mount opt ext4: data=journal: write-protect pages on j_submit_inode_data_buffers() ext4: data=journal: fixes for ext4_page_mkwrite() jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers() jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers() ext4: introduce ext4_sb_bread_unmovable() to replace sb_bread_unmovable() ext4: use ext4_sb_bread() instead of sb_bread() ...
Diffstat (limited to 'fs/ext4/namei.c')
-rw-r--r--fs/ext4/namei.c206
1 files changed, 130 insertions, 76 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 0d74615fcce3..5159830dacb8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2553,7 +2553,7 @@ out:
* for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
* on regular files) and to avoid creating huge/slow non-HTREE directories.
*/
-static void ext4_inc_count(handle_t *handle, struct inode *inode)
+static void ext4_inc_count(struct inode *inode)
{
inc_nlink(inode);
if (is_dx(inode) &&
@@ -2565,7 +2565,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
* If a directory had nlink == 1, then we should let it be 1. This indicates
* directory has >EXT4_LINK_MAX subdirs.
*/
-static void ext4_dec_count(handle_t *handle, struct inode *inode)
+static void ext4_dec_count(struct inode *inode)
{
if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
drop_nlink(inode);
@@ -2610,7 +2610,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
handle_t *handle;
- struct inode *inode;
+ struct inode *inode, *inode_save;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2628,7 +2628,11 @@ retry:
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
+ inode_save = inode;
+ ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
+ ext4_fc_track_create(inode_save, dentry);
+ iput(inode_save);
}
if (handle)
ext4_journal_stop(handle);
@@ -2643,7 +2647,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
handle_t *handle;
- struct inode *inode;
+ struct inode *inode, *inode_save;
int err, credits, retries = 0;
err = dquot_initialize(dir);
@@ -2660,7 +2664,12 @@ retry:
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations;
+ inode_save = inode;
+ ihold(inode_save);
err = ext4_add_nondir(handle, dentry, &inode);
+ if (!err)
+ ext4_fc_track_create(inode_save, dentry);
+ iput(inode_save);
}
if (handle)
ext4_journal_stop(handle);
@@ -2739,7 +2748,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
return ext4_next_entry(de, blocksize);
}
-static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+int ext4_init_new_dir(handle_t *handle, struct inode *dir,
struct inode *inode)
{
struct buffer_head *dir_block = NULL;
@@ -2824,7 +2833,9 @@ out_clear_inode:
iput(inode);
goto out_retry;
}
- ext4_inc_count(handle, dir);
+ ext4_fc_track_create(inode, dentry);
+ ext4_inc_count(dir);
+
ext4_update_dx_flag(dir);
err = ext4_mark_inode_dirty(handle, dir);
if (err)
@@ -3162,8 +3173,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
retval = ext4_mark_inode_dirty(handle, inode);
if (retval)
goto end_rmdir;
- ext4_dec_count(handle, dir);
+ ext4_dec_count(dir);
ext4_update_dx_flag(dir);
+ ext4_fc_track_unlink(inode, dentry);
retval = ext4_mark_inode_dirty(handle, dir);
#ifdef CONFIG_UNICODE
@@ -3184,42 +3196,32 @@ end_rmdir:
return retval;
}
-static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+ struct inode *inode)
{
- int retval;
- struct inode *inode;
+ int retval = -ENOENT;
struct buffer_head *bh;
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
+ int skip_remove_dentry = 0;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
- return -EIO;
-
- trace_ext4_unlink_enter(dir, dentry);
- /* Initialize quotas before so that eventual writes go
- * in separate transaction */
- retval = dquot_initialize(dir);
- if (retval)
- goto out_trace;
- retval = dquot_initialize(d_inode(dentry));
- if (retval)
- goto out_trace;
-
- bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
- if (IS_ERR(bh)) {
- retval = PTR_ERR(bh);
- goto out_trace;
- }
- if (!bh) {
- retval = -ENOENT;
- goto out_trace;
- }
+ bh = ext4_find_entry(dir, d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
- inode = d_inode(dentry);
+ if (!bh)
+ return -ENOENT;
if (le32_to_cpu(de->inode) != inode->i_ino) {
- retval = -EFSCORRUPTED;
- goto out_bh;
+ /*
+ * It's okay if we find dont find dentry which matches
+ * the inode. That's because it might have gotten
+ * renamed to a different inode number
+ */
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ skip_remove_dentry = 1;
+ else
+ goto out_bh;
}
handle = ext4_journal_start(dir, EXT4_HT_DIR,
@@ -3232,17 +3234,21 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- retval = ext4_delete_entry(handle, dir, de, bh);
- if (retval)
- goto out_handle;
- dir->i_ctime = dir->i_mtime = current_time(dir);
- ext4_update_dx_flag(dir);
- retval = ext4_mark_inode_dirty(handle, dir);
- if (retval)
- goto out_handle;
+ if (!skip_remove_dentry) {
+ retval = ext4_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto out_handle;
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ ext4_update_dx_flag(dir);
+ retval = ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+ goto out_handle;
+ } else {
+ retval = 0;
+ }
if (inode->i_nlink == 0)
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
- dentry->d_name.len, dentry->d_name.name);
+ d_name->len, d_name->name);
else
drop_nlink(inode);
if (!inode->i_nlink)
@@ -3250,6 +3256,35 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
inode->i_ctime = current_time(inode);
retval = ext4_mark_inode_dirty(handle, inode);
+out_handle:
+ ext4_journal_stop(handle);
+out_bh:
+ brelse(bh);
+ return retval;
+}
+
+static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int retval;
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
+ trace_ext4_unlink_enter(dir, dentry);
+ /*
+ * Initialize quotas before so that eventual writes go
+ * in separate transaction
+ */
+ retval = dquot_initialize(dir);
+ if (retval)
+ goto out_trace;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ goto out_trace;
+
+ retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
+ if (!retval)
+ ext4_fc_track_unlink(d_inode(dentry), dentry);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
@@ -3261,10 +3296,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
d_invalidate(dentry);
#endif
-out_handle:
- ext4_journal_stop(handle);
-out_bh:
- brelse(bh);
out_trace:
trace_ext4_unlink_exit(dentry, retval);
return retval;
@@ -3345,7 +3376,8 @@ static int ext4_symlink(struct inode *dir,
*/
drop_nlink(inode);
err = ext4_orphan_add(handle, inode);
- ext4_journal_stop(handle);
+ if (handle)
+ ext4_journal_stop(handle);
handle = NULL;
if (err)
goto err_drop_inode;
@@ -3399,29 +3431,10 @@ out_free_encrypted_link:
return err;
}
-static int ext4_link(struct dentry *old_dentry,
- struct inode *dir, struct dentry *dentry)
+int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
{
handle_t *handle;
- struct inode *inode = d_inode(old_dentry);
int err, retries = 0;
-
- if (inode->i_nlink >= EXT4_LINK_MAX)
- return -EMLINK;
-
- err = fscrypt_prepare_link(old_dentry, dir, dentry);
- if (err)
- return err;
-
- if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
- (!projid_eq(EXT4_I(dir)->i_projid,
- EXT4_I(old_dentry->d_inode)->i_projid)))
- return -EXDEV;
-
- err = dquot_initialize(dir);
- if (err)
- return err;
-
retry:
handle = ext4_journal_start(dir, EXT4_HT_DIR,
(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -3433,11 +3446,12 @@ retry:
ext4_handle_sync(handle);
inode->i_ctime = current_time(inode);
- ext4_inc_count(handle, inode);
+ ext4_inc_count(inode);
ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
+ ext4_fc_track_link(inode, dentry);
err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being
* linked the first time
@@ -3455,6 +3469,29 @@ retry:
return err;
}
+static int ext4_link(struct dentry *old_dentry,
+ struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = d_inode(old_dentry);
+ int err;
+
+ if (inode->i_nlink >= EXT4_LINK_MAX)
+ return -EMLINK;
+
+ err = fscrypt_prepare_link(old_dentry, dir, dentry);
+ if (err)
+ return err;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
+ return __ext4_link(dir, inode, dentry);
+}
/*
* Try to find buffer head where contains the parent block.
@@ -3630,9 +3667,9 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
{
if (ent->dir_nlink_delta) {
if (ent->dir_nlink_delta == -1)
- ext4_dec_count(handle, ent->dir);
+ ext4_dec_count(ent->dir);
else
- ext4_inc_count(handle, ent->dir);
+ ext4_inc_count(ent->dir);
ext4_mark_inode_dirty(handle, ent->dir);
}
}
@@ -3844,7 +3881,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (new.inode) {
- ext4_dec_count(handle, new.inode);
+ ext4_dec_count(new.inode);
new.inode->i_ctime = current_time(new.inode);
}
old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
@@ -3854,14 +3891,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
- ext4_dec_count(handle, old.dir);
+ ext4_dec_count(old.dir);
if (new.inode) {
/* checked ext4_empty_dir above, can't have another
* parent, ext4_dec_count() won't work for many-linked
* dirs */
clear_nlink(new.inode);
} else {
- ext4_inc_count(handle, new.dir);
+ ext4_inc_count(new.dir);
ext4_update_dx_flag(new.dir);
retval = ext4_mark_inode_dirty(handle, new.dir);
if (unlikely(retval))
@@ -3871,6 +3908,22 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = ext4_mark_inode_dirty(handle, old.dir);
if (unlikely(retval))
goto end_rename;
+
+ if (S_ISDIR(old.inode->i_mode)) {
+ /*
+ * We disable fast commits here that's because the
+ * replay code is not yet capable of changing dot dot
+ * dirents in directories.
+ */
+ ext4_fc_mark_ineligible(old.inode->i_sb,
+ EXT4_FC_REASON_RENAME_DIR);
+ } else {
+ if (new.inode)
+ ext4_fc_track_unlink(new.inode, new.dentry);
+ ext4_fc_track_link(old.inode, new.dentry);
+ ext4_fc_track_unlink(old.inode, old.dentry);
+ }
+
if (new.inode) {
retval = ext4_mark_inode_dirty(handle, new.inode);
if (unlikely(retval))
@@ -4014,7 +4067,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = ext4_mark_inode_dirty(handle, new.inode);
if (unlikely(retval))
goto end_rename;
-
+ ext4_fc_mark_ineligible(new.inode->i_sb,
+ EXT4_FC_REASON_CROSS_RENAME);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval)