From 3eda41df05d6ad5c825cbc7fef03d563597b1afa Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Tue, 12 Oct 2021 13:19:01 -0400 Subject: Revert "ext4: enforce buffer head state assertion in ext4_da_map_blocks" This reverts commit 948ca5f30e1df0c11eb5b0f410b9ceb97fa77ad9. Two crash reports from users running variations on 5.15-rc4 kernels suggest that it is premature to enforce the state assertion in the original commit. Both crashes were triggered by BUG calls in that code, indicating that under some rare circumstance the buffer head state did not match a delayed allocated block at the time the block was written out. No reproducer is available. Resolving this problem will require more time than remains in the current release cycle, so reverting the original patch for the time being is necessary to avoid any instability it may cause. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20211012171901.5352-1-enwlinux@gmail.com Fixes: 948ca5f30e1d ("ext4: enforce buffer head state assertion in ext4_da_map_blocks") Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0f06305167d5..9097fccdc688 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1711,16 +1711,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, } /* - * the buffer head associated with a delayed and not unwritten - * block found in the extent status cache must contain an - * invalid block number and have its BH_New and BH_Delay bits - * set, reflecting the state assigned when the block was - * initially delayed allocated + * Delayed extent could be allocated by fallocate. + * So we need to check it. */ - if (ext4_es_is_delonly(&es)) { - BUG_ON(bh->b_blocknr != invalid_block); - BUG_ON(!buffer_new(bh)); - BUG_ON(!buffer_delay(bh)); + if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); return 0; } -- cgit v1.2.3-59-g8ed1b From 39fec6889d15a658c3a3ebb06fd69d3584ddffd3 Mon Sep 17 00:00:00 2001 From: Shaoying Xu Date: Thu, 2 Sep 2021 16:44:12 +0000 Subject: ext4: fix lazy initialization next schedule time computation in more granular unit Ext4 file system has default lazy inode table initialization setup once it is mounted. However, it has issue on computing the next schedule time that makes the timeout same amount in jiffies but different real time in secs if with various HZ values. Therefore, fix by measuring the current time in a more granular unit nanoseconds and make the next schedule time independent of the HZ value. Fixes: bfff68738f1c ("ext4: add support for lazy inode table initialization") Signed-off-by: Shaoying Xu Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210902164412.9994-2-shaoyi@amazon.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88d5d274a868..8a67e5f3f576 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3263,9 +3263,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct super_block *sb = elr->lr_super; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t group = elr->lr_next_group; - unsigned long timeout = 0; unsigned int prefetch_ios = 0; int ret = 0; + u64 start_time; if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { elr->lr_next_group = ext4_mb_prefetch(sb, group, @@ -3302,14 +3302,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - timeout = jiffies; + start_time = ktime_get_real_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { - timeout = (jiffies - timeout) * - EXT4_SB(elr->lr_super)->s_li_wait_mult; - elr->lr_timeout = timeout; + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + EXT4_SB(elr->lr_super)->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; -- cgit v1.2.3-59-g8ed1b From 83c5688b8977b1cd495a05ca0455e4353c8f6655 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:46 +0800 Subject: ext4: correct the left/middle/right debug message for binsearch The debuginfo for binsearch want to show the left/middle/right extent while the process search for the goal block. However we show this info after we change right or left. Link: https://lore.kernel.org/r/20210903062748.4118886-2-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e02571f2f82..c59426f1e1d2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -714,13 +714,14 @@ ext4_ext_binsearch_idx(struct inode *inode, r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); + if (block < le32_to_cpu(m->ei_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), - r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; @@ -782,13 +783,14 @@ ext4_ext_binsearch(struct inode *inode, while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); + if (block < le32_to_cpu(m->ee_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), - r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; -- cgit v1.2.3-59-g8ed1b From 4268496e48dc681cfa53b92357314b5d7221e625 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:47 +0800 Subject: ext4: ensure enough credits in ext4_ext_shift_path_extents Like ext4_ext_rm_leaf, we can ensure that there are enough credits before every call that will consume credits. As part of this fix we fold the functionality of ext4_access_path() into ext4_ext_shift_path_extents(). This change is needed as a preparation for the next bugfix patch. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-3-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 49 +++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c59426f1e1d2..6b080f61342a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4979,36 +4979,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } -/* - * ext4_access_path: - * Function to access the path buffer for marking it dirty. - * It also checks if there are sufficient credits left in the journal handle - * to update path. - */ -static int -ext4_access_path(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int credits, err; - - if (!ext4_handle_valid(handle)) - return 0; - - /* - * Check if need to extend journal credits - * 3 for leaf, sb, and inode plus 2 (bmap and group - * descriptor) for each block group; assume two block - * groups - */ - credits = ext4_writepage_trans_blocks(inode); - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); - if (err < 0) - return err; - - err = ext4_ext_get_access(handle, inode, path); - return err; -} - /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext @@ -5023,6 +4993,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; + int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { @@ -5032,13 +5003,23 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + /* leaf + sb + inode */ + credits = 3; + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { + update = true; + /* extent tree + sb + inode */ + credits = depth + 2; + } - err = ext4_access_path(handle, inode, path + depth); + restart_credits = ext4_writepage_trans_blocks(inode); + err = ext4_datasem_ensure_credits(handle, inode, credits, + restart_credits, 0); if (err) goto out; - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = true; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5069,7 +5050,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, } /* Update index too */ - err = ext4_access_path(handle, inode, path + depth); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; -- cgit v1.2.3-59-g8ed1b From 1811bc401aa58c7bdb0df3205aa6613b49d32127 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:48 +0800 Subject: ext4: refresh the ext4_ext_path struct after dropping i_data_sem. After we drop i_data sem, we need to reload the ext4_ext_path structure since the extent tree can change once i_data_sem is released. This addresses the BUG: [52117.465187] ------------[ cut here ]------------ [52117.465686] kernel BUG at fs/ext4/extents.c:1756! ... [52117.478306] Call Trace: [52117.478565] ext4_ext_shift_extents+0x3ee/0x710 [52117.479020] ext4_fallocate+0x139c/0x1b40 [52117.479405] ? __do_sys_newfstat+0x6b/0x80 [52117.479805] vfs_fallocate+0x151/0x4b0 [52117.480177] ksys_fallocate+0x4a/0xa0 [52117.480533] __x64_sys_fallocate+0x22/0x30 [52117.480930] do_syscall_64+0x35/0x80 [52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae [52117.481769] RIP: 0033:0x7fa062f855ca Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-4-yangerkun@huawei.com Signed-off-by: yangerkun Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6b080f61342a..15c68bc80d21 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5014,8 +5014,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, restart_credits = ext4_writepage_trans_blocks(inode); err = ext4_datasem_ensure_credits(handle, inode, credits, restart_credits, 0); - if (err) + if (err) { + if (err > 0) + err = -EAGAIN; goto out; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -5089,6 +5092,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; + ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, @@ -5142,11 +5146,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; + if (tmp != EXT_MAX_BLOCKS) + *iterator = tmp; + /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator @@ -5175,6 +5183,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } + tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + @@ -5193,6 +5202,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); + /* iterator can be NULL which means we should break */ + if (ret == -EAGAIN) + goto again; if (ret) break; } -- cgit v1.2.3-59-g8ed1b From 31d21d219b51dcfb16e18427eddae5394d402820 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Mon, 19 Jul 2021 13:59:14 +0800 Subject: ext4: convert from atomic_t to refcount_t on ext4_io_end->count refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1626674355-55795-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++- fs/ext4/page-io.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3825195539d7..404dd50856e5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -17,6 +17,7 @@ #ifndef _EXT4_H #define _EXT4_H +#include #include #include #include @@ -241,7 +242,7 @@ typedef struct ext4_io_end { struct bio *bio; /* Linked list of completed * bios covering the extent */ unsigned int flag; /* unwritten or not */ - atomic_t count; /* reference counter */ + refcount_t count; /* reference counter */ struct list_head list_vec; /* list of ext4_io_end_vec */ } ext4_io_end_t; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index f038d578d8d8..9cb261714991 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -279,14 +279,14 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) io_end->inode = inode; INIT_LIST_HEAD(&io_end->list); INIT_LIST_HEAD(&io_end->list_vec); - atomic_set(&io_end->count, 1); + refcount_set(&io_end->count, 1); } return io_end; } void ext4_put_io_end_defer(ext4_io_end_t *io_end) { - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || list_empty(&io_end->list_vec)) { ext4_release_io_end(io_end); @@ -300,7 +300,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) { int err = 0; - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { err = ext4_convert_unwritten_io_end_vec(io_end->handle, io_end); @@ -314,7 +314,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) { - atomic_inc(&io_end->count); + refcount_inc(&io_end->count); return io_end; } -- cgit v1.2.3-59-g8ed1b From 8dd27fecede55e8a4e67eef2878040ecad0f0d33 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:48 +0800 Subject: ext4: check for out-of-order index extents in ext4_valid_extent_entries() After commit 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()"), we can check out the overlapping extent entry in leaf extent blocks. But the out-of-order extent entry in index extent blocks could also trigger bad things if the filesystem is inconsistent. So this patch add a check to figure out the out-of-order index extents and return error. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 15c68bc80d21..c0ad07adf9a6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -357,6 +357,9 @@ static int ext4_valid_extent_entries(struct inode *inode, ext4_fsblk_t *pblk, int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1; @@ -365,31 +368,35 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { *pblk = ext4_ext_pblock(ext); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; -- cgit v1.2.3-59-g8ed1b From 9c6e071913792d80894cd0be98cc3c4b770e26d3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:49 +0800 Subject: ext4: check for inconsistent extents between index and leaf block Now that we can check out overlapping extents in leaf block and out-of-order index extents in index block. But the .ee_block in the first extent of one leaf block should equal to the .ei_block in it's parent index extent entry. This patch add a check to verify such inconsistent between the index and leaf block. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20210908120850.4012324-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 23 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c0ad07adf9a6..f6a902de7f41 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -354,7 +354,8 @@ static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - ext4_fsblk_t *pblk, int depth) + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; ext4_lblk_t lblock = 0; @@ -368,6 +369,14 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; @@ -384,6 +393,14 @@ static int ext4_valid_extent_entries(struct inode *inode, } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; @@ -404,7 +421,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -430,7 +447,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -460,7 +477,7 @@ corrupted: } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -493,16 +510,18 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + ext4_fsblk_t pblk; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; + pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -515,8 +534,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; set_buffer_verified(bh); @@ -534,8 +553,8 @@ errout: } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -585,8 +604,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -893,8 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1503,7 +1520,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1570,20 +1586,17 @@ got_index: * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -2962,9 +2975,9 @@ again: ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); -- cgit v1.2.3-59-g8ed1b From 0f2f87d51aebcf71a709b52f661d681594c7dffa Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:50 +0800 Subject: ext4: prevent partial update of the extent blocks In the most error path of current extents updating operations are not roll back partial updates properly when some bad things happens(.e.g in ext4_ext_insert_extent()). So we may get an inconsistent extents tree if journal has been aborted due to IO error, which may probability lead to BUGON later when we accessing these extent entries in errors=continue mode. This patch drop extent buffer's verify flag before updatng the contents in ext4_ext_get_access(), and reset it after updating in __ext4_ext_dirty(). After this patch we could force to check the extent buffer if extents tree updating was break off, make sure the extents are consistent. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f6a902de7f41..09f56e04f4b2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -136,15 +136,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, inode->i_sb, - path->p_bh, EXT4_JTR_NONE); + err = ext4_journal_get_write_access(handle, inode->i_sb, + path->p_bh, EXT4_JTR_NONE); + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -165,6 +175,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); -- cgit v1.2.3-59-g8ed1b From 664bd38b9cbed11689a9b7ce8b7db2e57b7b9e23 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:53 +0800 Subject: ext4: factor out ext4_fill_raw_inode() Factor out ext4_fill_raw_inode() from ext4_do_update_inode(), which is use to fill the in-mem inode contents into the inode table buffer, in preparation for initializing the exclusive inode buffer without reading the block in __ext4_get_inode_loc(). Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 85 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 38 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9097fccdc688..791958088e57 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4852,9 +4852,8 @@ bad_inode: return ERR_PTR(ret); } -static int ext4_inode_blocks_set(handle_t *handle, - struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) +static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, + struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); u64 i_blocks = READ_ONCE(inode->i_blocks); @@ -4957,37 +4956,16 @@ static void ext4_update_other_inodes_time(struct super_block *sb, rcu_read_unlock(); } -/* - * Post the struct inode info into an on-disk inode location in the - * buffer-cache. This gobbles the caller's reference to the - * buffer_head in the inode location struct. - * - * The caller must have write access to iloc->bh. - */ -static int ext4_do_update_inode(handle_t *handle, - struct inode *inode, - struct ext4_iloc *iloc) +static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) { - struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); - struct buffer_head *bh = iloc->bh; - struct super_block *sb = inode->i_sb; - int err = 0, block; - int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; projid_t i_projid; + int block; + int err; - spin_lock(&ei->i_raw_lock); - - /* - * For fields not tracked in the in-memory inode, initialise them - * to zero for new inodes. - */ - if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) - memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - - err = ext4_inode_blocks_set(handle, raw_inode, ei); + err = ext4_inode_blocks_set(raw_inode, ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); @@ -5029,16 +5007,8 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) { - ext4_isize_set(raw_inode, ei->i_disksize); - need_datasync = 1; - } - if (ei->i_disksize > 0x7fffffffULL) { - if (!ext4_has_feature_large_file(sb) || - EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) - set_large_file = 1; - } + ext4_isize_set(raw_inode, ei->i_disksize); + raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { @@ -5078,6 +5048,45 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_projid = cpu_to_le32(i_projid); ext4_inode_csum_set(inode, raw_inode, ei); + return err; +} + +/* + * Post the struct inode info into an on-disk inode location in the + * buffer-cache. This gobbles the caller's reference to the + * buffer_head in the inode location struct. + * + * The caller must have write access to iloc->bh. + */ +static int ext4_do_update_inode(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc) +{ + struct ext4_inode *raw_inode = ext4_raw_inode(iloc); + struct ext4_inode_info *ei = EXT4_I(inode); + struct buffer_head *bh = iloc->bh; + struct super_block *sb = inode->i_sb; + int err; + int need_datasync = 0, set_large_file = 0; + + spin_lock(&ei->i_raw_lock); + + /* + * For fields not tracked in the in-memory inode, initialise them + * to zero for new inodes. + */ + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) + memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + + if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) + need_datasync = 1; + if (ei->i_disksize > 0x7fffffffULL) { + if (!ext4_has_feature_large_file(sb) || + EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) + set_large_file = 1; + } + + err = ext4_fill_raw_inode(inode, raw_inode); spin_unlock(&ei->i_raw_lock); if (err) { EXT4_ERROR_INODE(inode, "corrupted inode contents"); -- cgit v1.2.3-59-g8ed1b From 9a1bf32c8e12b7768325e83e9b9eeb69c46435b3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:54 +0800 Subject: ext4: move ext4_fill_raw_inode() related functions In preparation for calling ext4_fill_raw_inode() in __ext4_get_inode_loc(), move three related functions before __ext4_get_inode_loc(), no logical change. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 293 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 147 insertions(+), 146 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 791958088e57..a46d5e022175 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4231,6 +4231,153 @@ out_trace: return err; } +static inline u64 ext4_inode_peek_iversion(const struct inode *inode) +{ + if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + return inode_peek_iversion_raw(inode); + else + return inode_peek_iversion(inode); +} + +static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, + struct ext4_inode_info *ei) +{ + struct inode *inode = &(ei->vfs_inode); + u64 i_blocks = READ_ONCE(inode->i_blocks); + struct super_block *sb = inode->i_sb; + + if (i_blocks <= ~0U) { + /* + * i_blocks can be represented in a 32 bit variable + * as multiple of 512 bytes + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = 0; + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + return 0; + } + + /* + * This should never happen since sb->s_maxbytes should not have + * allowed this, sb->s_maxbytes was set according to the huge_file + * feature in ext4_fill_super(). + */ + if (!ext4_has_feature_huge_file(sb)) + return -EFSCORRUPTED; + + if (i_blocks <= 0xffffffffffffULL) { + /* + * i_blocks can be represented in a 48 bit variable + * as multiple of 512 bytes + */ + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); + ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); + } else { + ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); + /* i_block is stored in file system block size */ + i_blocks = i_blocks >> (inode->i_blkbits - 9); + raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); + raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); + } + return 0; +} + +static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + uid_t i_uid; + gid_t i_gid; + projid_t i_projid; + int block; + int err; + + err = ext4_inode_blocks_set(raw_inode, ei); + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + i_uid = i_uid_read(inode); + i_gid = i_gid_read(inode); + i_projid = from_kprojid(&init_user_ns, ei->i_projid); + if (!(test_opt(inode->i_sb, NO_UID32))) { + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); + /* + * Fix up interoperability with old kernels. Otherwise, + * old inodes get re-used with the upper 16 bits of the + * uid/gid intact. + */ + if (ei->i_dtime && list_empty(&ei->i_orphan)) { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } else { + raw_inode->i_uid_high = + cpu_to_le16(high_16_bits(i_uid)); + raw_inode->i_gid_high = + cpu_to_le16(high_16_bits(i_gid)); + } + } else { + raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); + raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); + + raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); + raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) + raw_inode->i_file_acl_high = + cpu_to_le16(ei->i_file_acl >> 32); + raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); + ext4_isize_set(raw_inode, ei->i_disksize); + + raw_inode->i_generation = cpu_to_le32(inode->i_generation); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + raw_inode->i_block[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + raw_inode->i_block[1] = 0; + } else { + raw_inode->i_block[0] = 0; + raw_inode->i_block[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + raw_inode->i_block[2] = 0; + } + } else if (!ext4_has_inline_data(inode)) { + for (block = 0; block < EXT4_N_BLOCKS; block++) + raw_inode->i_block[block] = ei->i_data[block]; + } + + if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { + u64 ivers = ext4_inode_peek_iversion(inode); + + raw_inode->i_disk_version = cpu_to_le32(ivers); + if (ei->i_extra_isize) { + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) + raw_inode->i_version_hi = + cpu_to_le32(ivers >> 32); + raw_inode->i_extra_isize = + cpu_to_le16(ei->i_extra_isize); + } + } + + if (i_projid != EXT4_DEF_PROJID && + !ext4_has_feature_project(inode->i_sb)) + err = err ?: -EFSCORRUPTED; + + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) + raw_inode->i_projid = cpu_to_le32(i_projid); + + ext4_inode_csum_set(inode, raw_inode, ei); + return err; +} + /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -4525,13 +4672,6 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) else inode_set_iversion_queried(inode, val); } -static inline u64 ext4_inode_peek_iversion(const struct inode *inode) -{ - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return inode_peek_iversion_raw(inode); - else - return inode_peek_iversion(inode); -} struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ext4_iget_flags flags, const char *function, @@ -4852,50 +4992,6 @@ bad_inode: return ERR_PTR(ret); } -static int ext4_inode_blocks_set(struct ext4_inode *raw_inode, - struct ext4_inode_info *ei) -{ - struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = READ_ONCE(inode->i_blocks); - struct super_block *sb = inode->i_sb; - - if (i_blocks <= ~0U) { - /* - * i_blocks can be represented in a 32 bit variable - * as multiple of 512 bytes - */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = 0; - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); - return 0; - } - - /* - * This should never happen since sb->s_maxbytes should not have - * allowed this, sb->s_maxbytes was set according to the huge_file - * feature in ext4_fill_super(). - */ - if (!ext4_has_feature_huge_file(sb)) - return -EFSCORRUPTED; - - if (i_blocks <= 0xffffffffffffULL) { - /* - * i_blocks can be represented in a 48 bit variable - * as multiple of 512 bytes - */ - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); - } else { - ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); - /* i_block is stored in file system block size */ - i_blocks = i_blocks >> (inode->i_blkbits - 9); - raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); - raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); - } - return 0; -} - static void __ext4_update_other_inode_time(struct super_block *sb, unsigned long orig_ino, unsigned long ino, @@ -4956,101 +5052,6 @@ static void ext4_update_other_inodes_time(struct super_block *sb, rcu_read_unlock(); } -static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - uid_t i_uid; - gid_t i_gid; - projid_t i_projid; - int block; - int err; - - err = ext4_inode_blocks_set(raw_inode, ei); - - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - i_uid = i_uid_read(inode); - i_gid = i_gid_read(inode); - i_projid = from_kprojid(&init_user_ns, ei->i_projid); - if (!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); - /* - * Fix up interoperability with old kernels. Otherwise, - * old inodes get re-used with the upper 16 bits of the - * uid/gid intact. - */ - if (ei->i_dtime && list_empty(&ei->i_orphan)) { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } else { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(i_uid)); - raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(i_gid)); - } - } else { - raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); - raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - - EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); - EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); - EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) - raw_inode->i_file_acl_high = - cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); - - raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - raw_inode->i_block[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - raw_inode->i_block[1] = 0; - } else { - raw_inode->i_block[0] = 0; - raw_inode->i_block[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - raw_inode->i_block[2] = 0; - } - } else if (!ext4_has_inline_data(inode)) { - for (block = 0; block < EXT4_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - } - - if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - u64 ivers = ext4_inode_peek_iversion(inode); - - raw_inode->i_disk_version = cpu_to_le32(ivers); - if (ei->i_extra_isize) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - raw_inode->i_version_hi = - cpu_to_le32(ivers >> 32); - raw_inode->i_extra_isize = - cpu_to_le16(ei->i_extra_isize); - } - } - - if (i_projid != EXT4_DEF_PROJID && - !ext4_has_feature_project(inode->i_sb)) - err = err ?: -EFSCORRUPTED; - - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) - raw_inode->i_projid = cpu_to_le32(i_projid); - - ext4_inode_csum_set(inode, raw_inode, ei); - return err; -} - /* * Post the struct inode info into an on-disk inode location in the * buffer-cache. This gobbles the caller's reference to the -- cgit v1.2.3-59-g8ed1b From de01f484576d29b02fb2856387f29cfdf5ad4f19 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 1 Sep 2021 10:09:55 +0800 Subject: ext4: prevent getting empty inode buffer In ext4_get_inode_loc(), we may skip IO and get an zero && uptodate inode buffer when the inode monopolize an inode block for performance reason. For most cases, ext4_mark_iloc_dirty() will fill the inode buffer to make it fine, but we could miss this call if something bad happened. Finally, __ext4_get_inode_loc_noinmem() may probably get an empty inode buffer and trigger ext4 error. For example, if we remove a nonexistent xattr on inode A, ext4_xattr_set_handle() will return ENODATA before invoking ext4_mark_iloc_dirty(), it will left an uptodate but zero buffer. We will get checksum error message in ext4_iget() when getting inode again. EXT4-fs error (device sda): ext4_lookup:1784: inode #131074: comm cat: iget: checksum invalid Even worse, if we allocate another inode B at the same inode block, it will corrupt the inode A on disk when write back inode B. So this patch initialize the inode buffer by filling the in-mem inode contents if we skip read I/O, ensure that the buffer is really uptodate. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210901020955.1657340-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a46d5e022175..bfd3545f1e5d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4380,12 +4380,12 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode /* * ext4_get_inode_loc returns with an extra refcount against the inode's - * underlying buffer_head on success. If 'in_mem' is true, we have all - * data in memory that is needed to recreate the on-disk version of this - * inode. + * underlying buffer_head on success. If we pass 'inode' and it does not + * have in-inode xattr, we have all inode data in memory that is needed + * to recreate the on-disk version of this inode. */ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, - struct ext4_iloc *iloc, int in_mem, + struct inode *inode, struct ext4_iloc *iloc, ext4_fsblk_t *ret_block) { struct ext4_group_desc *gdp; @@ -4431,7 +4431,7 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, * is the only valid inode in the block, we need not read the * block. */ - if (in_mem) { + if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { struct buffer_head *bitmap_bh; int i, start; @@ -4459,8 +4459,13 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, } brelse(bitmap_bh); if (i == start + inodes_per_block) { + struct ext4_inode *raw_inode = + (struct ext4_inode *) (bh->b_data + iloc->offset); + /* all other inodes are free, so skip I/O */ memset(bh->b_data, 0, bh->b_size); + if (!ext4_test_inode_state(inode, EXT4_STATE_NEW)) + ext4_fill_raw_inode(inode, raw_inode); set_buffer_uptodate(bh); unlock_buffer(bh); goto has_buffer; @@ -4521,7 +4526,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, ext4_fsblk_t err_blk; int ret; - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0, + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, &err_blk); if (ret == -EIO) @@ -4536,9 +4541,8 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) ext4_fsblk_t err_blk; int ret; - /* We have all inode data except xattrs in memory here. */ - ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, - !ext4_test_inode_state(inode, EXT4_STATE_XATTR), &err_blk); + ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, + &err_blk); if (ret == -EIO) ext4_error_inode_block(inode, err_blk, EIO, @@ -4551,7 +4555,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino, struct ext4_iloc *iloc) { - return __ext4_get_inode_loc(sb, ino, iloc, 0, NULL); + return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL); } static bool ext4_should_enable_dax(struct inode *inode) -- cgit v1.2.3-59-g8ed1b From d4ffeeb7315d82e10803e067cbf079f246b09f00 Mon Sep 17 00:00:00 2001 From: Jing Yangyang Date: Mon, 23 Aug 2021 22:55:43 -0700 Subject: ext4: fix boolreturn.cocci warnings in fs/ext4/name.c Return statements in functions returning bool should use true/false instead of 1/0. ./fs/ext4/namei.c:1441:12-13:WARNING:return of 0/1 in function 'ext4_match' with return type bool Reported-by: Zeal Robot Signed-off-by: Jing Yangyang Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210824055543.58718-1-deng.changcheng@zte.com.cn Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index da7698341d7d..52c9bd154122 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1439,7 +1439,7 @@ static bool ext4_match(struct inode *parent, fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de)) { - return 0; + return false; } } return !ext4_ci_compare(parent, &cf, de->name, -- cgit v1.2.3-59-g8ed1b From 3bbef91bdd2180c67407285ba160b023eb4d5306 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Tue, 24 Aug 2021 04:49:29 +0100 Subject: ext4: remove an unused variable warning with CONFIG_QUOTA=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'enable_quota' variable is only used in an CONFIG_QUOTA. With CONFIG_QUOTA=n, compiler causes a harmless warning: fs/ext4/super.c: In function ‘ext4_remount’: fs/ext4/super.c:5840:6: warning: variable ‘enable_quota’ set but not used [-Wunused-but-set-variable] int enable_quota = 0; ^~~~~ Move 'enable_quota' into the same #ifdef CONFIG_QUOTA block to remove an unused variable warning. Signed-off-by: Austin Kim Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210824034929.GA13415@raspberrypi Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8a67e5f3f576..160e58249482 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5726,10 +5726,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long old_sb_flags, vfs_flags; struct ext4_mount_options old_opts; - int enable_quota = 0; ext4_group_t g; int err = 0; #ifdef CONFIG_QUOTA + int enable_quota = 0; int i, j; char *to_free[EXT4_MAXQUOTAS]; #endif @@ -5934,7 +5934,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) err = -EROFS; goto restore_opts; } +#ifdef CONFIG_QUOTA enable_quota = 1; +#endif } } -- cgit v1.2.3-59-g8ed1b From afcc4e32f606dbfb47aa7309172c89174b86e74c Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 20 Aug 2021 14:08:53 +0200 Subject: ext4: scope ret locally in ext4_try_to_trim_range() As commit 6920b3913235 ("ext4: add new helper interface ext4_try_to_trim_range()") moves some code into the separate function ext4_try_to_trim_range(), the use of the variable ret within that function is more limited and can be adjusted as well. Scope the use of the variable ret locally and drop dead assignments. No functional change. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 72bfac2d6dce..215b7068f548 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6299,7 +6299,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { ext4_grpblk_t next, count, free_count; void *bitmap; - int ret = 0; bitmap = e4b->bd_bitmap; start = (e4b->bd_info->bb_first_free > start) ? @@ -6314,10 +6313,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) next = mb_find_next_bit(bitmap, max + 1, start); if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, next - start, e4b); + int ret = ext4_trim_extent(sb, start, next - start, e4b); + if (ret && ret != -EOPNOTSUPP) break; - ret = 0; count += next - start; } free_count += next - start; -- cgit v1.2.3-59-g8ed1b From 6c31a689b2e9e1dee5cbe16b773648a2d84dfb02 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 15 Oct 2021 11:25:12 -0700 Subject: ext4: commit inline data during fast commit During the commit phase in fast commits if an inode with inline data is being committed, also commit the inline data along with inode. Since recovery code just blindly copies entire content found in inode TLV, there is no change needed on the recovery path. Thus, this change is backward compatiable. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211015182513.395917-1-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 8ea5a81e6554..744b000d9756 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -819,7 +819,9 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) if (ret) return ret; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + inode_len = EXT4_INODE_SIZE(inode->i_sb); + else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) inode_len += ei->i_extra_isize; fc_inode.fc_ino = cpu_to_le32(inode->i_ino); -- cgit v1.2.3-59-g8ed1b From 1ebf21784b19d5bc269f39a5d1eedb7f29a7d152 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 15 Oct 2021 11:25:13 -0700 Subject: ext4: inline data inode fast commit replay fixes Since there are no blocks in an inline data inode, there's no point in fixing iblocks field in fast commit replay path for this inode. Similarly, there's no point in fixing any block bitmaps / global block counters with respect to such an inode. Just bail out from these functions if an inline data inode is encountered. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211015182513.395917-2-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 3 +++ fs/ext4/fast_commit.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 09f56e04f4b2..0ecf819bf189 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -6071,6 +6071,9 @@ int ext4_ext_clear_bb(struct inode *inode) int j, ret = 0; struct ext4_map_blocks map; + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + return 0; + /* Determin the size of the file first */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, EXT4_EX_NOCACHE); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 744b000d9756..0f32b445582a 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1526,7 +1526,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, * crashing. This should be fixed but until then, we calculate * the number of blocks the inode. */ - ext4_ext_replay_set_iblocks(inode); + if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + ext4_ext_replay_set_iblocks(inode); inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); ext4_reset_inode_seed(inode); @@ -1844,6 +1845,10 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) } cur = 0; end = EXT_MAX_BLOCKS; + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { + iput(inode); + continue; + } while (cur < end) { map.m_lblk = cur; map.m_len = end - cur; -- cgit v1.2.3-59-g8ed1b From 124e7c61deb27d758df5ec0521c36cf08d417f7a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 26 Oct 2021 14:33:02 -0300 Subject: ext4: fix error code saved on super block during file system abort ext4_abort will eventually call ext4_errno_to_code, which translates the errno to an EXT4_ERR specific error. This means that ext4_abort expects an errno. By using EXT4_ERR_ here, it gets misinterpreted (as an errno), and ends up saving EXT4_ERR_EBUSY on the superblock during an abort, which makes no sense. ESHUTDOWN will get properly translated to EXT4_ERR_SHUTDOWN, so use that instead. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20211026173302.84000-1-krisman@collabora.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 160e58249482..0e8406f5bf0a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5820,7 +5820,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) - ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); -- cgit v1.2.3-59-g8ed1b