diff options
Diffstat (limited to '')
-rw-r--r-- | fs/ext4/extents.c | 1647 |
1 files changed, 873 insertions, 774 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 954013d6076b..f1956288307f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -27,7 +27,8 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/fiemap.h> -#include <linux/backing-dev.h> +#include <linux/iomap.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" @@ -83,13 +84,6 @@ static void ext4_extent_block_csum_set(struct inode *inode, et->et_checksum = ext4_extent_block_csum(inode, eh); } -static int ext4_split_extent(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_map_blocks *map, - int split_flag, - int flags); - static int ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path **ppath, @@ -97,24 +91,40 @@ static int ext4_split_extent_at(handle_t *handle, int split_flag, int flags); -static int ext4_find_delayed_extent(struct inode *inode, - struct extent_status *newes); - static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { /* * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; } +static void ext4_ext_drop_refs(struct ext4_ext_path *path) +{ + int depth, i; + + if (!path) + return; + depth = path->p_depth; + for (i = 0; i <= depth; i++, path++) { + brelse(path->p_bh); + path->p_bh = NULL; + } +} + +void ext4_free_ext_path(struct ext4_ext_path *path) +{ + ext4_ext_drop_refs(path); + kfree(path); +} + /* * Make sure 'handle' has at least 'check_cred' credits. If not, restart * transaction with 'restart_cred' credits. The function drops i_data_sem @@ -145,14 +155,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, path->p_bh); + err = ext4_journal_get_write_access(handle, inode->i_sb, + path->p_bh, EXT4_JTR_NONE); + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -173,6 +194,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); @@ -306,11 +330,14 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode, { struct ext4_ext_path *path = *ppath; int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); + int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; + + if (nofail) + flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, - EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | - (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); + flags); } static int @@ -346,7 +373,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) */ if (lblock + len <= lblock) return 0; - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + return ext4_inode_block_valid(inode, block, len); } static int ext4_valid_extent_idx(struct inode *inode, @@ -354,14 +381,18 @@ static int ext4_valid_extent_idx(struct inode *inode, { ext4_fsblk_t block = ext4_idx_pblock(ext_idx); - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); + return ext4_inode_block_valid(inode, block, 1); } static int ext4_valid_extent_entries(struct inode *inode, - struct ext4_extent_header *eh, - int depth) + struct ext4_extent_header *eh, + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t cur = 0; + if (eh->eh_entries == 0) return 1; @@ -370,34 +401,51 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - ext4_fsblk_t pblock = 0; - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); - if ((lblock <= prev) && prev) { - pblock = ext4_ext_pblock(ext); - es->s_last_error_block = cpu_to_le64(pblock); + if (lblock < cur) { + *pblk = ext4_ext_pblock(ext); return 0; } + cur = lblock + ext4_ext_get_actual_len(ext); ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if (lblock < cur) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + cur = lblock + 1; } } return 1; @@ -405,7 +453,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -431,7 +479,11 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, depth)) { + if (unlikely((eh->eh_entries == 0) && (depth > 0))) { + error_msg = "eh_entries is 0 but eh_depth is > 0"; + goto corrupted; + } + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -449,19 +501,19 @@ static int __ext4_ext_check(const char *function, unsigned int line, return 0; corrupted: - ext4_set_errno(inode->i_sb, -err); - ext4_error_inode(inode, function, line, 0, - "pblk %llu bad header/extent: %s - magic %x, " - "entries %u, max %u(%u), depth %u(%u)", - (unsigned long long) pblk, error_msg, - le16_to_cpu(eh->eh_magic), - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), - max, le16_to_cpu(eh->eh_depth), depth); + ext4_error_inode_err(inode, function, line, 0, -err, + "pblk %llu bad header/extent: %s - magic %x, " + "entries %u, max %u(%u), depth %u(%u)", + (unsigned long long) pblk, error_msg, + le16_to_cpu(eh->eh_magic), + le16_to_cpu(eh->eh_entries), + le16_to_cpu(eh->eh_max), + max, le16_to_cpu(eh->eh_depth), depth); return err; } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -494,32 +546,34 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; + gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + ext4_fsblk_t pblk; - bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; + + pblk = ext4_idx_pblock(idx); + bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); if (!bh_uptodate_or_lock(bh)) { trace_ext4_ext_load_extent(inode, pblk, _RET_IP_); - err = bh_submit_read(bh); + err = ext4_read_bh(bh, 0, NULL); if (err < 0) goto errout; } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - if (!ext4_has_feature_journal(inode->i_sb) || - (inode->i_ino != - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; - } + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); + if (err) + goto errout; set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries @@ -535,8 +589,8 @@ errout: } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -556,6 +610,12 @@ int ext4_ext_precache(struct inode *inode) down_read(&ei->i_data_sem); depth = ext_depth(inode); + /* Don't cache anything if there are no external extent blocks */ + if (!depth) { + up_read(&ei->i_data_sem); + return ret; + } + path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), GFP_NOFS); if (path == NULL) { @@ -563,9 +623,6 @@ int ext4_ext_precache(struct inode *inode) return -ENOMEM; } - /* Don't cache anything if there are no external extent blocks */ - if (depth == 0) - goto out; path[0].p_hdr = ext_inode_hdr(inode); ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0); if (ret) @@ -583,8 +640,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -599,8 +655,7 @@ int ext4_ext_precache(struct inode *inode) ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED); out: up_read(&ei->i_data_sem); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return ret; } @@ -609,22 +664,22 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { int k, l = path->p_depth; - ext_debug("path:"); + ext_debug(inode, "path:"); for (k = 0; k <= l; k++, path++) { if (path->p_idx) { - ext_debug(" %d->%llu", + ext_debug(inode, " %d->%llu", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); } else if (path->p_ext) { - ext_debug(" %d:[%d]%d:%llu ", + ext_debug(inode, " %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext4_ext_pblock(path->p_ext)); } else - ext_debug(" []"); + ext_debug(inode, " []"); } - ext_debug("\n"); + ext_debug(inode, "\n"); } static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) @@ -640,14 +695,14 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) eh = path[depth].p_hdr; ex = EXT_FIRST_EXTENT(eh); - ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); + ext_debug(inode, "Displaying leaf extents\n"); for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { - ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), + ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); } - ext_debug("\n"); + ext_debug(inode, "\n"); } static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, @@ -660,10 +715,9 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent_idx *idx; idx = path[level].p_idx; while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", level, - le32_to_cpu(idx->ei_block), - ext4_idx_pblock(idx), - newblock); + ext_debug(inode, "%d: move %d:%llu in new index %llu\n", + level, le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), newblock); idx++; } @@ -672,7 +726,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), ext4_ext_is_unwritten(ex), @@ -688,21 +742,6 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, #define ext4_ext_show_move(inode, path, newblock, level) #endif -void ext4_ext_drop_refs(struct ext4_ext_path *path) -{ - int depth, i; - - if (!path) - return; - depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) { - if (path->p_bh) { - brelse(path->p_bh); - path->p_bh = NULL; - } - } -} - /* * ext4_ext_binsearch_idx: * binary search for the closest index of the given block @@ -716,23 +755,24 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_extent_idx *r, *l, *m; - ext_debug("binsearch for %u(idx): ", block); + ext_debug(inode, "binsearch for %u(idx): ", block); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); + if (block < le32_to_cpu(m->ei_block)) r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), - m, le32_to_cpu(m->ei_block), - r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; - ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), + ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH @@ -783,24 +823,25 @@ ext4_ext_binsearch(struct inode *inode, return; } - ext_debug("binsearch for %u: ", block); + ext_debug(inode, "binsearch for %u: ", block); l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_LAST_EXTENT(eh); while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); + if (block < le32_to_cpu(m->ee_block)) r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), - m, le32_to_cpu(m->ee_block), - r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; - ext_debug(" -> %d:%llu:[%d]%d ", + ext_debug(inode, " -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_pblock(path->p_ext), ext4_ext_is_unwritten(path->p_ext), @@ -825,7 +866,7 @@ ext4_ext_binsearch(struct inode *inode, } -int ext4_ext_tree_init(handle_t *handle, struct inode *inode) +void ext4_ext_tree_init(handle_t *handle, struct inode *inode) { struct ext4_extent_header *eh; @@ -834,8 +875,8 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) eh->eh_entries = 0; eh->eh_magic = EXT4_EXT_MAGIC; eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); + eh->eh_generation = 0; ext4_mark_inode_dirty(handle, inode); - return 0; } struct ext4_ext_path * @@ -847,6 +888,10 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, struct ext4_ext_path *path = orig_path ? *orig_path : NULL; short int depth, i, ppos = 0; int ret; + gfp_t gfp_flags = GFP_NOFS; + + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; eh = ext_inode_hdr(inode); depth = ext_depth(inode); @@ -867,7 +912,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, if (!path) { /* account possible depth increase */ path = kcalloc(depth + 2, sizeof(struct ext4_ext_path), - GFP_NOFS); + gfp_flags); if (unlikely(!path)) return ERR_PTR(-ENOMEM); path[0].p_maxdepth = depth + 1; @@ -880,7 +925,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { - ext_debug("depth %d: num %d, max %d\n", + ext_debug(inode, "depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); ext4_ext_binsearch_idx(inode, path + ppos, block); @@ -888,8 +933,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -916,8 +960,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, return path; err: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (orig_path) *orig_path = NULL; return ERR_PTR(ret); @@ -957,18 +1000,20 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ - ext_debug("insert new index %d after: %llu\n", logical, ptr); + ext_debug(inode, "insert new index %d after: %llu\n", + logical, ptr); ix = curp->p_idx + 1; } else { /* insert before */ - ext_debug("insert new index %d before: %llu\n", logical, ptr); + ext_debug(inode, "insert new index %d before: %llu\n", + logical, ptr); ix = curp->p_idx; } len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1; BUG_ON(len < 0); if (len > 0) { - ext_debug("insert new index %d: " + ext_debug(inode, "insert new index %d: " "move %d indices from 0x%p to 0x%p\n", logical, len, ix, ix + 1); memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx)); @@ -1017,9 +1062,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, oldblock; __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ + gfp_t gfp_flags = GFP_NOFS; int err = 0; size_t ext_size = 0; + if (flags & EXT4_EX_NOFAIL) + gfp_flags |= __GFP_NOFAIL; + /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1031,12 +1080,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; - ext_debug("leaf will be split." + ext_debug(inode, "leaf will be split." " next leaf starts at %d\n", le32_to_cpu(border)); } else { border = newext->ee_block; - ext_debug("leaf will be added." + ext_debug(inode, "leaf will be added." " next leaf starts at %d\n", le32_to_cpu(border)); } @@ -1053,12 +1102,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, * We need this to handle errors and free blocks * upon them. */ - ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), GFP_NOFS); + ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags); if (!ablocks) return -ENOMEM; /* allocate all needed blocks */ - ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); + ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err, flags); @@ -1081,7 +1130,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } lock_buffer(bh); - err = ext4_journal_get_create_access(handle, bh); + err = ext4_journal_get_create_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); if (err) goto cleanup; @@ -1090,6 +1140,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; + neh->eh_generation = 0; /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != @@ -1144,7 +1195,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, goto cleanup; } if (k) - ext_debug("create %d intermediate indices\n", k); + ext_debug(inode, "create %d intermediate indices\n", k); /* insert new index into current index block */ /* current depth stored in i var */ i = depth - 1; @@ -1158,7 +1209,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } lock_buffer(bh); - err = ext4_journal_get_create_access(handle, bh); + err = ext4_journal_get_create_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); if (err) goto cleanup; @@ -1167,11 +1219,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); neh->eh_depth = cpu_to_le16(depth - i); + neh->eh_generation = 0; fidx = EXT_FIRST_INDEX(neh); fidx->ei_block = border; ext4_idx_store_pblock(fidx, oldblock); - ext_debug("int.index at %d (block %llu): %u -> %llu\n", + ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); /* move remainder of path[i] to the new index block */ @@ -1185,7 +1238,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } /* start copy indexes */ m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx, EXT_MAX_INDEX(path[i].p_hdr)); ext4_ext_show_move(inode, path, newblock, i); if (m) { @@ -1283,7 +1336,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, return -ENOMEM; lock_buffer(bh); - err = ext4_journal_get_create_access(handle, bh); + err = ext4_journal_get_create_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); if (err) { unlock_buffer(bh); goto out; @@ -1306,6 +1360,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, neh->eh_magic = EXT4_EXT_MAGIC; ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); + set_buffer_verified(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, inode, bh); @@ -1322,13 +1377,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, EXT_FIRST_INDEX(neh)->ei_block = EXT_FIRST_EXTENT(neh)->ee_block; } - ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", + ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n", le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), ext4_idx_pblock(EXT_FIRST_INDEX(neh))); le16_add_cpu(&neh->eh_depth, 1); - ext4_mark_inode_dirty(handle, inode); + err = ext4_mark_inode_dirty(handle, inode); out: brelse(bh); @@ -1449,8 +1504,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", ix != NULL ? le32_to_cpu(ix->ei_block) : 0, - EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? - le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, + le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block), depth); return -EFSCORRUPTED; } @@ -1471,22 +1525,21 @@ static int ext4_ext_search_left(struct inode *inode, } /* - * search the closest allocated block to the right for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the largest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code + * Search the closest allocated block to the right for *logical + * and returns it at @logical + it's physical address at @phys. + * If not exists, return 0 and @phys is set to 0. We will return + * 1 which means we found an allocated block and ret_ex is valid. + * Or return a (< 0) error code. */ static int ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *logical, ext4_fsblk_t *phys, - struct ext4_extent **ret_ex) + struct ext4_extent *ret_ex) { struct buffer_head *bh = NULL; struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1553,20 +1606,17 @@ got_index: * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -1574,10 +1624,11 @@ got_index: found_extent: *logical = le32_to_cpu(ex->ee_block); *phys = ext4_ext_pblock(ex); - *ret_ex = ex; + if (ret_ex) + *ret_ex = *ex; if (bh) put_bh(bh); - return 0; + return 1; } /* @@ -1909,7 +1960,7 @@ out: /* * ext4_ext_insert_extent: - * tries to merge requsted extent into the existing extent or + * tries to merge requested extent into the existing extent or * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ @@ -1964,7 +2015,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* Try to append newex to the ex */ if (ext4_can_extents_be_merged(inode, ex, newext)) { - ext_debug("append [%d]%d block to %u:[%d]%d" + ext_debug(inode, "append [%d]%d block to %u:[%d]%d" "(from %llu)\n", ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), @@ -1981,7 +2032,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } @@ -1989,7 +2039,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, prepend: /* Try to prepend newex to the ex */ if (ext4_can_extents_be_merged(inode, newext, ex)) { - ext_debug("prepend %u[%d]%d block to %u:[%d]%d" + ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n", le32_to_cpu(newext->ee_block), ext4_ext_is_unwritten(newext), @@ -2010,7 +2060,6 @@ prepend: + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } @@ -2027,20 +2076,20 @@ prepend: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { - ext_debug("next leaf block - %u\n", next); + ext_debug(inode, "next leaf block - %u\n", next); BUG_ON(npath != NULL); - npath = ext4_find_extent(inode, next, NULL, 0); + npath = ext4_find_extent(inode, next, NULL, gb_flags); if (IS_ERR(npath)) return PTR_ERR(npath); BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isn't full(%d)\n", + ext_debug(inode, "next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; goto has_space; } - ext_debug("next leaf has no free space(%d,%d)\n", + ext_debug(inode, "next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); } @@ -2066,7 +2115,7 @@ has_space: if (!nearex) { /* there is no extent in this leaf, create first one */ - ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", + ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), ext4_ext_is_unwritten(newext), @@ -2076,7 +2125,7 @@ has_space: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(nearex->ee_block)) { /* Insert after */ - ext_debug("insert %u:%llu:[%d]%d before: " + ext_debug(inode, "insert %u:%llu:[%d]%d before: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2087,7 +2136,7 @@ has_space: } else { /* Insert before */ BUG_ON(newext->ee_block == nearex->ee_block); - ext_debug("insert %u:%llu:[%d]%d after: " + ext_debug(inode, "insert %u:%llu:[%d]%d after: " "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2097,7 +2146,7 @@ has_space: } len = EXT_LAST_EXTENT(eh) - nearex + 1; if (len > 0) { - ext_debug("insert %u:%llu:[%d]%d: " + ext_debug(inode, "insert %u:%llu:[%d]%d: " "move %d extents from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), @@ -2129,157 +2178,7 @@ merge: err = ext4_ext_dirty(handle, inode, path + path->p_depth); cleanup: - ext4_ext_drop_refs(npath); - kfree(npath); - return err; -} - -static int ext4_fill_fiemap_extents(struct inode *inode, - ext4_lblk_t block, ext4_lblk_t num, - struct fiemap_extent_info *fieinfo) -{ - struct ext4_ext_path *path = NULL; - struct ext4_extent *ex; - struct extent_status es; - ext4_lblk_t next, next_del, start = 0, end = 0; - ext4_lblk_t last = block + num; - int exists, depth = 0, err = 0; - unsigned int flags = 0; - unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; - - while (block < last && block != EXT_MAX_BLOCKS) { - num = last - block; - /* find extent for this block */ - down_read(&EXT4_I(inode)->i_data_sem); - - path = ext4_find_extent(inode, block, &path, 0); - if (IS_ERR(path)) { - up_read(&EXT4_I(inode)->i_data_sem); - err = PTR_ERR(path); - path = NULL; - break; - } - - depth = ext_depth(inode); - if (unlikely(path[depth].p_hdr == NULL)) { - up_read(&EXT4_I(inode)->i_data_sem); - EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EFSCORRUPTED; - break; - } - ex = path[depth].p_ext; - next = ext4_ext_next_allocated_block(path); - - flags = 0; - exists = 0; - if (!ex) { - /* there is no extent yet, so try to allocate - * all requested space */ - start = block; - end = block + num; - } else if (le32_to_cpu(ex->ee_block) > block) { - /* need to allocate space before found extent */ - start = block; - end = le32_to_cpu(ex->ee_block); - if (block + num < end) - end = block + num; - } else if (block >= le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex)) { - /* need to allocate space after found extent */ - start = block; - end = block + num; - if (end >= next) - end = next; - } else if (block >= le32_to_cpu(ex->ee_block)) { - /* - * some part of requested space is covered - * by found extent - */ - start = block; - end = le32_to_cpu(ex->ee_block) - + ext4_ext_get_actual_len(ex); - if (block + num < end) - end = block + num; - exists = 1; - } else { - BUG(); - } - BUG_ON(end <= start); - - if (!exists) { - es.es_lblk = start; - es.es_len = end - start; - es.es_pblk = 0; - } else { - es.es_lblk = le32_to_cpu(ex->ee_block); - es.es_len = ext4_ext_get_actual_len(ex); - es.es_pblk = ext4_ext_pblock(ex); - if (ext4_ext_is_unwritten(ex)) - flags |= FIEMAP_EXTENT_UNWRITTEN; - } - - /* - * Find delayed extent and update es accordingly. We call - * it even in !exists case to find out whether es is the - * last existing extent or not. - */ - next_del = ext4_find_delayed_extent(inode, &es); - if (!exists && next_del) { - exists = 1; - flags |= (FIEMAP_EXTENT_DELALLOC | - FIEMAP_EXTENT_UNKNOWN); - } - up_read(&EXT4_I(inode)->i_data_sem); - - if (unlikely(es.es_len == 0)) { - EXT4_ERROR_INODE(inode, "es.es_len == 0"); - err = -EFSCORRUPTED; - break; - } - - /* - * This is possible iff next == next_del == EXT_MAX_BLOCKS. - * we need to check next == EXT_MAX_BLOCKS because it is - * possible that an extent is with unwritten and delayed - * status due to when an extent is delayed allocated and - * is allocated by fallocate status tree will track both of - * them in a extent. - * - * So we could return a unwritten and delayed extent, and - * its block is equal to 'next'. - */ - if (next == next_del && next == EXT_MAX_BLOCKS) { - flags |= FIEMAP_EXTENT_LAST; - if (unlikely(next_del != EXT_MAX_BLOCKS || - next != EXT_MAX_BLOCKS)) { - EXT4_ERROR_INODE(inode, - "next extent == %u, next " - "delalloc extent = %u", - next, next_del); - err = -EFSCORRUPTED; - break; - } - } - - if (exists) { - err = fiemap_fill_next_extent(fieinfo, - (__u64)es.es_lblk << blksize_bits, - (__u64)es.es_pblk << blksize_bits, - (__u64)es.es_len << blksize_bits, - flags); - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - } - - block = es.es_lblk + es.es_len; - } - - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(npath); return err; } @@ -2390,7 +2289,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, return; hole_len = min(es.es_lblk - hole_start, hole_len); } - ext_debug(" -> %u:%u\n", hole_start, hole_len); + ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); ext4_es_insert_extent(inode, hole_start, hole_len, ~0, EXTENT_STATUS_HOLE); } @@ -2427,7 +2326,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_dirty(handle, inode, path); if (err) return err; - ext_debug("index is empty, remove it, free block %llu\n", leaf); + ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); trace_ext4_ext_rm_idx(inode, leaf); ext4_free_blocks(handle, inode, NULL, leaf, 1, @@ -2706,7 +2605,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext4_fsblk_t pblk; /* the header must be checked already in ext4_ext_remove_space() */ - ext_debug("truncate since %u in leaf to %u\n", start, end); + ext_debug(inode, "truncate since %u in leaf to %u\n", start, end); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; @@ -2732,7 +2631,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, else unwritten = 0; - ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, + ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block, unwritten, ex_ee_len); path[depth].p_ext = ex; @@ -2740,7 +2639,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, b = ex_ee_block+ex_ee_len - 1 < end ? ex_ee_block+ex_ee_len - 1 : end; - ext_debug(" border %u:%u\n", a, b); + ext_debug(inode, " border %u:%u\n", a, b); /* If this extent is beyond the end of the hole, skip it */ if (end < ex_ee_block) { @@ -2849,7 +2748,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (err) goto out; - ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num, + ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num, ext4_ext_pblock(ex)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); @@ -2926,7 +2825,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, partial.lblk = 0; partial.state = initial; - ext_debug("truncate since %u to %u\n", start, end); + ext_debug(inode, "truncate since %u to %u\n", start, end); /* probably first extent we're gonna free will be last in block */ handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE, @@ -2951,7 +2850,8 @@ again: ext4_fsblk_t pblk; /* find extent for or closest extent to this block */ - path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); + path = ext4_find_extent(inode, end, NULL, + EXT4_EX_NOCACHE | EXT4_EX_NOFAIL); if (IS_ERR(path)) { ext4_journal_stop(handle); return PTR_ERR(path); @@ -2986,7 +2886,7 @@ again: * in use to avoid freeing it when removing blocks. */ if (sbi->s_cluster_ratio > 1) { - pblk = ext4_ext_pblock(ex) + end - ee_block + 2; + pblk = ext4_ext_pblock(ex) + end - ee_block + 1; partial.pclu = EXT4_B2C(sbi, pblk); partial.state = nofree; } @@ -3016,8 +2916,8 @@ again: */ lblk = ex_end + 1; err = ext4_ext_search_right(inode, path, &lblk, &pblk, - &ex); - if (err) + NULL); + if (err < 0) goto out; if (pblk) { partial.pclu = EXT4_B2C(sbi, pblk); @@ -3037,7 +2937,7 @@ again: le16_to_cpu(path[k].p_hdr->eh_entries)+1; } else { path = kcalloc(depth + 1, sizeof(struct ext4_ext_path), - GFP_NOFS); + GFP_NOFS | __GFP_NOFAIL); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; @@ -3067,7 +2967,7 @@ again: /* this is index block */ if (!path[i].p_hdr) { - ext_debug("initialize header\n"); + ext_debug(inode, "initialize header\n"); path[i].p_hdr = ext_block_hdr(path[i].p_bh); } @@ -3075,7 +2975,7 @@ again: /* this level hasn't been touched yet */ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; - ext_debug("init index ptr: hdr 0x%p, num %d\n", + ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n", path[i].p_hdr, le16_to_cpu(path[i].p_hdr->eh_entries)); } else { @@ -3083,18 +2983,18 @@ again: path[i].p_idx--; } - ext_debug("level %d - index, first 0x%p, cur 0x%p\n", + ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n", i, EXT_FIRST_INDEX(path[i].p_hdr), path[i].p_idx); if (ext4_ext_more_to_rm(path + i)) { struct buffer_head *bh; /* go to the next level */ - ext_debug("move to level %d (block %llu)\n", + ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); @@ -3125,7 +3025,7 @@ again: brelse(path[i].p_bh); path[i].p_bh = NULL; i--; - ext_debug("return to level %d\n", i); + ext_debug(inode, "return to level %d\n", i); } } @@ -3164,8 +3064,7 @@ again: } } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); path = NULL; if (err == -EAGAIN) goto again; @@ -3267,7 +3166,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * * * Splits extent [a, b] into two extents [a, @split) and [@split, b], states - * of which are deterimined by split_flag. + * of which are determined by split_flag. * * There are two cases: * a> the extent are splitted into two extent. @@ -3293,8 +3192,7 @@ static int ext4_split_extent_at(handle_t *handle, BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); - ext_debug("ext4_split_extents_at: inode %lu, logical" - "block %llu\n", inode->i_ino, (unsigned long long)split); + ext_debug(inode, "logical block %llu\n", (unsigned long long)split); ext4_ext_show_leaf(inode, path); @@ -3354,7 +3252,10 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_mark_unwritten(ex2); err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); - if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (err != -ENOSPC && err != -EDQUOT) + goto out; + + if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { err = ext4_ext_zeroout(inode, ex2); @@ -3380,30 +3281,34 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_pblock(&orig_ex)); } - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - if (err) - goto fix_extent_len; - - /* update extent status tree */ - err = ext4_zeroout_es(inode, &zero_ex); - - goto out; - } else if (err) - goto fix_extent_len; - -out: - ext4_ext_show_leaf(inode, path); - return err; + if (!err) { + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le16(ee_len); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (!err) + /* update extent status tree */ + err = ext4_zeroout_es(inode, &zero_ex); + /* If we failed at this point, we don't know in which + * state the extent tree exactly is so don't try to fix + * length of the original extent as it may do even more + * damage. + */ + goto out; + } + } fix_extent_len: ex->ee_len = orig_ex.ee_len; + /* + * Ignore ext4_ext_dirty return value since we are already in error path + * and err is a non-zero error code. + */ ext4_ext_dirty(handle, inode, path + path->p_depth); return err; +out: + ext4_ext_show_leaf(inode, path); + return err; } /* @@ -3458,7 +3363,7 @@ static int ext4_split_extent(handle_t *handle, * Update path is required because previous ext4_split_extent_at() may * result in split of original leaf or extent zeroout. */ - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + path = ext4_find_extent(inode, map->m_lblk, ppath, flags); if (IS_ERR(path)) return PTR_ERR(path); depth = ext_depth(inode); @@ -3469,7 +3374,6 @@ static int ext4_split_extent(handle_t *handle, return -EFSCORRUPTED; } unwritten = ext4_ext_is_unwritten(ex); - split_flag1 = 0; if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; @@ -3527,13 +3431,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, int err = 0; int split_flag = EXT4_EXT_DATA_VALID2; - ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" - "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)map->m_lblk, map_len); + ext_debug(inode, "logical block %llu, max_blocks %u\n", + (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3661,7 +3564,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } if (allocated) { /* Mark the block containing both extents as dirty */ - ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + depth); /* Update path to point to the right extent */ path[depth].p_ext = abut_ex; @@ -3707,7 +3610,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, split_map.m_len - ee_block); err = ext4_ext_zeroout(inode, &zero_ex1); if (err) - goto out; + goto fallback; split_map.m_len = allocated; } if (split_map.m_lblk - ee_block + split_map.m_len < @@ -3721,7 +3624,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_pblock(ex)); err = ext4_ext_zeroout(inode, &zero_ex2); if (err) - goto out; + goto fallback; } split_map.m_len += split_map.m_lblk - ee_block; @@ -3730,6 +3633,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } } +fallback: err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, flags); if (err > 0) @@ -3781,17 +3685,16 @@ static int ext4_split_convert_extents(handle_t *handle, unsigned int ee_len; int split_flag = 0, depth; - ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", - __func__, inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* * It is safe to convert extent to initialized via explicit - * zeroout only if extent is fully insde i_size or new_size. + * zeroout only if extent is fully inside i_size or new_size. */ depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3828,8 +3731,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" - "block %llu, max_blocks %u\n", inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); /* If extent is larger than requested it is a clear sign that we still @@ -3874,64 +3776,11 @@ out: return err; } -/* - * Handle EOFBLOCKS_FL flag, clearing it if necessary - */ -static int check_eofblocks_fl(handle_t *handle, struct inode *inode, - ext4_lblk_t lblk, - struct ext4_ext_path *path, - unsigned int len) -{ - int i, depth; - struct ext4_extent_header *eh; - struct ext4_extent *last_ex; - - if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) - return 0; - - depth = ext_depth(inode); - eh = path[depth].p_hdr; - - /* - * We're going to remove EOFBLOCKS_FL entirely in future so we - * do not care for this case anymore. Simply remove the flag - * if there are no extents. - */ - if (unlikely(!eh->eh_entries)) - goto out; - last_ex = EXT_LAST_EXTENT(eh); - /* - * We should clear the EOFBLOCKS_FL flag if we are writing the - * last block in the last extent in the file. We test this by - * first checking to see if the caller to - * ext4_ext_get_blocks() was interested in the last block (or - * a block beyond the last block) in the current extent. If - * this turns out to be false, we can bail out from this - * function immediately. - */ - if (lblk + len < le32_to_cpu(last_ex->ee_block) + - ext4_ext_get_actual_len(last_ex)) - return 0; - /* - * If the caller does appear to be planning to write at or - * beyond the end of the current extent, we then test to see - * if the current extent is the last extent in the file, by - * checking to make sure it was reached via the rightmost node - * at each level of the tree. - */ - for (i = depth-1; i >= 0; i--) - if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) - return 0; -out: - ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - return ext4_mark_inode_dirty(handle, inode); -} - static int convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path **ppath, - unsigned int allocated) + unsigned int *allocated) { struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; @@ -3952,8 +3801,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - ext_debug("%s: inode %lu, logical" - "block %llu, max_blocks %u\n", __func__, inode->i_ino, + ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); if (ee_block != map->m_lblk || ee_len > map->m_len) { @@ -3991,14 +3839,12 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ext4_ext_show_leaf(inode, path); ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); - if (err) - return err; + map->m_flags |= EXT4_MAP_UNWRITTEN; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; - return allocated; + if (*allocated > map->m_len) + *allocated = map->m_len; + map->m_len = *allocated; + return 0; } static int @@ -4007,14 +3853,13 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_ext_path **ppath, int flags, unsigned int allocated, ext4_fsblk_t newblock) { - struct ext4_ext_path *path = *ppath; + struct ext4_ext_path __maybe_unused *path = *ppath; int ret = 0; int err = 0; - ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical " - "block %llu, max_blocks %u, flags %x, allocated %u\n", - inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, - flags, allocated); + ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", + (unsigned long long)map->m_lblk, map->m_len, flags, + allocated); ext4_ext_show_leaf(inode, path); /* @@ -4026,41 +3871,38 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, trace_ext4_ext_handle_unwritten_extents(inode, map, flags, allocated, newblock); - /* get_block() before submit the IO, split the extent */ + /* get_block() before submitting IO, split the extent */ if (flags & EXT4_GET_BLOCKS_PRE_IO) { ret = ext4_split_convert_extents(handle, inode, map, ppath, flags | EXT4_GET_BLOCKS_CONVERT); - if (ret <= 0) - goto out; + if (ret < 0) { + err = ret; + goto out2; + } + /* + * shouldn't get a 0 return when splitting an extent unless + * m_len is 0 (bug) or extent has been corrupted + */ + if (unlikely(ret == 0)) { + EXT4_ERROR_INODE(inode, + "unexpected ret == 0, m_len = %u", + map->m_len); + err = -EFSCORRUPTED; + goto out2; + } map->m_flags |= EXT4_MAP_UNWRITTEN; goto out; } /* IO end_io complete, convert the filled extent to written */ if (flags & EXT4_GET_BLOCKS_CONVERT) { - if (flags & EXT4_GET_BLOCKS_ZERO) { - if (allocated > map->m_len) - allocated = map->m_len; - err = ext4_issue_zeroout(inode, map->m_lblk, newblock, - allocated); - if (err < 0) - goto out2; - } - ret = ext4_convert_unwritten_extents_endio(handle, inode, map, + err = ext4_convert_unwritten_extents_endio(handle, inode, map, ppath); - if (ret >= 0) { - ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map->m_lblk, - path, map->m_len); - } else - err = ret; - map->m_flags |= EXT4_MAP_MAPPED; - map->m_pblk = newblock; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; - goto out2; + if (err < 0) + goto out2; + ext4_update_inode_fsync_trans(handle, inode, 1); + goto map_out; } - /* buffered IO case */ + /* buffered IO cases */ /* * repeat fallocate creation request * we already have an unwritten extent @@ -4083,35 +3925,39 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, goto out1; } - /* buffered write, writepage time, convert*/ + /* + * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1. + * For buffered writes, at writepage time, etc. Convert a + * discovered unwritten extent to written. + */ ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); - if (ret >= 0) - ext4_update_inode_fsync_trans(handle, inode, 1); -out: - if (ret <= 0) { + if (ret < 0) { err = ret; goto out2; - } else - allocated = ret; - map->m_flags |= EXT4_MAP_NEW; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; + } + ext4_update_inode_fsync_trans(handle, inode, 1); + /* + * shouldn't get a 0 return when converting an unwritten extent + * unless m_len is 0 (bug) or extent has been corrupted + */ + if (unlikely(ret == 0)) { + EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u", + map->m_len); + err = -EFSCORRUPTED; + goto out2; + } +out: + allocated = ret; + map->m_flags |= EXT4_MAP_NEW; map_out: map->m_flags |= EXT4_MAP_MAPPED; - if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) { - err = check_eofblocks_fl(handle, inode, map->m_lblk, path, - map->m_len); - if (err < 0) - goto out2; - } out1: + map->m_pblk = newblock; if (allocated > map->m_len) allocated = map->m_len; - ext4_ext_show_leaf(inode, path); - map->m_pblk = newblock; map->m_len = allocated; + ext4_ext_show_leaf(inode, path); out2: return err ? err : allocated; } @@ -4227,7 +4073,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) * - * return > 0, number of of blocks already mapped/allocated + * return > 0, number of blocks already mapped/allocated * if create == 0 and these are pre-allocated blocks * buffer head is unmapped * otherwise blocks are mapped @@ -4241,18 +4087,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; - struct ext4_extent newex, *ex, *ex2; + struct ext4_extent newex, *ex, ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - ext4_fsblk_t newblock = 0; - int free_on_err = 0, err = 0, depth, ret; + ext4_fsblk_t newblock = 0, pblk; + int err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; ext4_lblk_t cluster_offset; - bool map_from_cluster = false; - ext_debug("blocks %u/%u requested for inode %lu\n", - map->m_lblk, map->m_len, inode->i_ino); + ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* find extent for this block */ @@ -4260,7 +4104,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; - goto out2; + goto out; } depth = ext_depth(inode); @@ -4276,7 +4120,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, (unsigned long) map->m_lblk, depth, path[depth].p_block); err = -EFSCORRUPTED; - goto out2; + goto out; } ex = path[depth].p_ext; @@ -4299,8 +4143,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (map->m_lblk - ee_block); - ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, - ee_block, ee_len, newblock); + ext_debug(inode, "%u fit into %u:%d -> %llu\n", + map->m_lblk, ee_block, ee_len, newblock); /* * If the extent is initialized check whether the @@ -4308,12 +4152,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { - allocated = convert_initialized_extent( - handle, inode, map, &path, - allocated); - goto out2; - } else if (!ext4_ext_is_unwritten(ex)) + err = convert_initialized_extent(handle, + inode, map, &path, &allocated); goto out; + } else if (!ext4_ext_is_unwritten(ex)) { + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = newblock; + if (allocated > map->m_len) + allocated = map->m_len; + map->m_len = allocated; + ext4_ext_show_leaf(inode, path); + goto out; + } ret = ext4_ext_handle_unwritten_extents( handle, inode, map, &path, flags, @@ -4322,7 +4172,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, err = ret; else allocated = ret; - goto out2; + goto out; } } @@ -4347,7 +4197,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, map->m_pblk = 0; map->m_len = min_t(unsigned int, map->m_len, hole_len); - goto out2; + goto out; } /* @@ -4364,7 +4214,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; - map_from_cluster = true; goto got_allocated_blocks; } @@ -4372,20 +4221,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ar.lleft = map->m_lblk; err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); if (err) - goto out2; + goto out; ar.lright = map->m_lblk; - ex2 = NULL; err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); - if (err) - goto out2; + if (err < 0) + goto out; /* Check if the extent after searching to the right implies a * cluster we can use. */ - if ((sbi->s_cluster_ratio > 1) && ex2 && - get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { + if ((sbi->s_cluster_ratio > 1) && err && + get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; - map_from_cluster = true; goto got_allocated_blocks; } @@ -4439,51 +4286,44 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ar.flags |= EXT4_MB_USE_RESERVED; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) - goto out2; - ext_debug("allocate new block: goal %llu, found %llu/%u\n", - ar.goal, newblock, allocated); - free_on_err = 1; + goto out; allocated_clusters = ar.len; ar.len = EXT4_C2B(sbi, ar.len) - offset; + ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n", + ar.goal, newblock, ar.len, allocated); if (ar.len > allocated) ar.len = allocated; got_allocated_blocks: /* try to insert new extent into found leaf and return */ - ext4_ext_store_pblock(&newex, newblock + offset); + pblk = newblock + offset; + ext4_ext_store_pblock(&newex, pblk); newex.ee_len = cpu_to_le16(ar.len); /* Mark unwritten */ - if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { ext4_ext_mark_unwritten(&newex); map->m_flags |= EXT4_MAP_UNWRITTEN; } - err = 0; - if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) - err = check_eofblocks_fl(handle, inode, map->m_lblk, - path, ar.len); - if (!err) - err = ext4_ext_insert_extent(handle, inode, &path, - &newex, flags); - - if (err && free_on_err) { - int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? - EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; - /* free data blocks we just allocated */ - /* not a good idea to call discard here directly, - * but otherwise we'd need to call it every free() */ - ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, newblock, - EXT4_C2B(sbi, allocated_clusters), fb_flags); - goto out2; - } + err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags); + if (err) { + if (allocated_clusters) { + int fb_flags = 0; - /* previous routine could use block we allocated */ - newblock = ext4_ext_pblock(&newex); - allocated = ext4_ext_get_actual_len(&newex); - if (allocated > map->m_len) - allocated = map->m_len; - map->m_flags |= EXT4_MAP_NEW; + /* + * free data blocks we just allocated. + * not a good idea to call discard here directly, + * but otherwise we'd need to call it every free(). + */ + ext4_discard_preallocations(inode, 0); + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), + fb_flags); + } + goto out; + } /* * Reduce the reserved cluster count to reflect successful deferred @@ -4491,7 +4331,7 @@ got_allocated_blocks: * clusters discovered to be delayed allocated. Once allocated, a * cluster is not included in the reserved count. */ - if (test_opt(inode->i_sb, DELALLOC) && !map_from_cluster) { + if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) { if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { /* * When allocating delayed allocated clusters, simply @@ -4530,16 +4370,14 @@ got_allocated_blocks: ext4_update_inode_fsync_trans(handle, inode, 1); else ext4_update_inode_fsync_trans(handle, inode, 0); -out: - if (allocated > map->m_len) - allocated = map->m_len; + + map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED); + map->m_pblk = pblk; + map->m_len = ar.len; + allocated = map->m_len; ext4_ext_show_leaf(inode, path); - map->m_flags |= EXT4_MAP_MAPPED; - map->m_pblk = newblock; - map->m_len = allocated; -out2: - ext4_ext_drop_refs(path); - kfree(path); +out: + ext4_free_ext_path(path); trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); @@ -4570,13 +4408,18 @@ retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); if (err == -ENOMEM) { - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_ATOMIC); goto retry; } if (err) return err; - return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); +retry_remove_space: + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + if (err == -ENOMEM) { + memalloc_retry_wait(GFP_ATOMIC); + goto retry_remove_space; + } + return err; } static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, @@ -4585,8 +4428,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, { struct inode *inode = file_inode(file); handle_t *handle; - int ret = 0; - int ret2 = 0; + int ret = 0, ret2 = 0, ret3 = 0; int retries = 0; int depth = 0; struct ext4_map_blocks map; @@ -4611,7 +4453,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, depth = ext_depth(inode); retry: - while (ret >= 0 && len) { + while (len) { /* * Recalculate credits when extent tree depth changes. */ @@ -4633,9 +4475,13 @@ retry: inode->i_ino, map.m_lblk, map.m_len, ret); ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); + ext4_journal_stop(handle); break; } + /* + * allow a full retry cycle for any remaining allocations + */ + retries = 0; map.m_lblk += ret; map.m_len = len = len - ret; epos = (loff_t)map.m_lblk << inode->i_blkbits; @@ -4645,34 +4491,29 @@ retry: epos = new_size; if (ext4_update_inode_size(inode, epos) & 0x1) inode->i_mtime = inode->i_ctime; - } else { - if (epos > inode->i_size) - ext4_set_inode_flag(inode, - EXT4_INODE_EOFBLOCKS); } - ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); - ret2 = ext4_journal_stop(handle); - if (ret2) + ret3 = ext4_journal_stop(handle); + ret2 = ret3 ? ret3 : ret2; + if (unlikely(ret2)) break; } - if (ret == -ENOSPC && - ext4_should_retry_alloc(inode->i_sb, &retries)) { - ret = 0; + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - } return ret > 0 ? ret2 : ret; } -static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); +static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len); -static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); +static int ext4_insert_range(struct file *file, loff_t offset, loff_t len); static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) { struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; handle_t *handle = NULL; unsigned int max_blocks; loff_t new_size = 0; @@ -4694,7 +4535,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, } /* - * Round up offset. This is not fallocate, we neet to zero out + * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to * unwritten and possibly manually zero out unaligned parts of the * range. @@ -4717,7 +4558,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, inode_lock(inode); /* - * Indirect files do not support unwritten extnets + * Indirect files do not support unwritten extents */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { ret = -EOPNOTSUPP; @@ -4734,12 +4575,14 @@ static long ext4_zero_range(struct file *file, loff_t offset, } flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - if (mode & FALLOC_FL_KEEP_SIZE) - flags |= EXT4_GET_BLOCKS_KEEP_SIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4761,17 +4604,17 @@ static long ext4_zero_range(struct file *file, loff_t offset, * Prevent page faults from reinstantiating pages we have * released from page cache. */ - down_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) { - up_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_unlock(mapping); goto out_mutex; } ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { - up_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_unlock(mapping); goto out_mutex; } /* Now release the pages and zero block aligned part of pages */ @@ -4780,7 +4623,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); - up_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_unlock(mapping); if (ret) goto out_mutex; } @@ -4802,18 +4645,11 @@ static long ext4_zero_range(struct file *file, loff_t offset, } inode->i_mtime = inode->i_ctime = current_time(inode); - if (new_size) { + if (new_size) ext4_update_inode_size(inode, new_size); - } else { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. - */ - if (offset + len > inode->i_size) - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - } - ext4_mark_inode_dirty(handle, inode); - + ret = ext4_mark_inode_dirty(handle, inode); + if (unlikely(ret)) + goto out_handle; /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) @@ -4822,6 +4658,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (file->f_flags & O_SYNC) ext4_handle_sync(handle); +out_handle: ext4_journal_stop(handle); out_mutex: inode_unlock(inode); @@ -4861,29 +4698,36 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - if (mode & FALLOC_FL_PUNCH_HOLE) - return ext4_punch_hole(inode, offset, len); - + inode_lock(inode); ret = ext4_convert_inline_data(inode); + inode_unlock(inode); if (ret) - return ret; + goto exit; - if (mode & FALLOC_FL_COLLAPSE_RANGE) - return ext4_collapse_range(inode, offset, len); + if (mode & FALLOC_FL_PUNCH_HOLE) { + ret = ext4_punch_hole(file, offset, len); + goto exit; + } - if (mode & FALLOC_FL_INSERT_RANGE) - return ext4_insert_range(inode, offset, len); + if (mode & FALLOC_FL_COLLAPSE_RANGE) { + ret = ext4_collapse_range(file, offset, len); + goto exit; + } - if (mode & FALLOC_FL_ZERO_RANGE) - return ext4_zero_range(file, offset, len, mode); + if (mode & FALLOC_FL_INSERT_RANGE) { + ret = ext4_insert_range(file, offset, len); + goto exit; + } + if (mode & FALLOC_FL_ZERO_RANGE) { + ret = ext4_zero_range(file, offset, len, mode); + goto exit; + } trace_ext4_fallocate_enter(inode, offset, len, mode); lblk = offset >> blkbits; max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - if (mode & FALLOC_FL_KEEP_SIZE) - flags |= EXT4_GET_BLOCKS_KEEP_SIZE; inode_lock(inode); @@ -4904,20 +4748,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out; + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out; if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { - ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, - EXT4_I(inode)->i_sync_tid); + ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, + EXT4_I(inode)->i_sync_tid); } out: inode_unlock(inode); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); +exit: return ret; } @@ -4935,8 +4784,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, loff_t offset, ssize_t len) { unsigned int max_blocks; - int ret = 0; - int ret2 = 0; + int ret = 0, ret2 = 0, ret3 = 0; struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; unsigned int credits = 0; @@ -4969,9 +4817,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, "ext4_ext_map_blocks returned %d", inode->i_ino, map.m_lblk, map.m_len, ret); - ext4_mark_inode_dirty(handle, inode); - if (credits) - ret2 = ext4_journal_stop(handle); + ret2 = ext4_mark_inode_dirty(handle, inode); + if (credits) { + ret3 = ext4_journal_stop(handle); + if (unlikely(ret3)) + ret2 = ret3; + } + if (ret <= 0 || ret2) break; } @@ -4980,7 +4832,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) { - int ret, err = 0; + int ret = 0, err = 0; struct ext4_io_end_vec *io_end_vec; /* @@ -5009,64 +4861,13 @@ int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) return ret < 0 ? ret : err; } -/* - * If newes is not existing extent (newes->ec_pblk equals zero) find - * delayed extent at start of newes and update newes accordingly and - * return start of the next delayed extent. - * - * If newes is existing extent (newes->ec_pblk is not equal zero) - * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed - * extent found. Leave newes unmodified. - */ -static int ext4_find_delayed_extent(struct inode *inode, - struct extent_status *newes) -{ - struct extent_status es; - ext4_lblk_t block, next_del; - - if (newes->es_pblk == 0) { - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - newes->es_lblk, - newes->es_lblk + newes->es_len - 1, - &es); - - /* - * No extent in extent-tree contains block @newes->es_pblk, - * then the block may stay in 1)a hole or 2)delayed-extent. - */ - if (es.es_len == 0) - /* A hole found. */ - return 0; - - if (es.es_lblk > newes->es_lblk) { - /* A hole found. */ - newes->es_len = min(es.es_lblk - newes->es_lblk, - newes->es_len); - return 0; - } - - newes->es_len = es.es_lblk + es.es_len - newes->es_lblk; - } - - block = newes->es_lblk + newes->es_len; - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, block, - EXT_MAX_BLOCKS, &es); - if (es.es_len == 0) - next_del = EXT_MAX_BLOCKS; - else - next_del = es.es_lblk; - - return next_del; -} - -static int ext4_xattr_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo) +static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap) { __u64 physical = 0; - __u64 length; - __u32 flags = FIEMAP_EXTENT_LAST; + __u64 length = 0; int blockbits = inode->i_sb->s_blocksize_bits; int error = 0; + u16 iomap_type; /* in-inode? */ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { @@ -5081,40 +4882,69 @@ static int ext4_xattr_fiemap(struct inode *inode, EXT4_I(inode)->i_extra_isize; physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; - flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); - } else { /* external block */ + iomap_type = IOMAP_INLINE; + } else if (EXT4_I(inode)->i_file_acl) { /* external block */ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; + iomap_type = IOMAP_MAPPED; + } else { + /* no in-inode or external block for xattr, so return -ENOENT */ + error = -ENOENT; + goto out; } - if (physical) - error = fiemap_fill_next_extent(fieinfo, 0, physical, - length, flags); - return (error < 0 ? error : 0); + iomap->addr = physical; + iomap->offset = 0; + iomap->length = length; + iomap->type = iomap_type; + iomap->flags = 0; +out: + return error; } -static int _ext4_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, - int (*fill)(struct inode *, ext4_lblk_t, - ext4_lblk_t, - struct fiemap_extent_info *)) +static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned flags, + struct iomap *iomap, struct iomap *srcmap) { - ext4_lblk_t start_blk; - u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR; + int error; - int error = 0; + error = ext4_iomap_xattr_fiemap(inode, iomap); + if (error == 0 && (offset >= iomap->length)) + error = -ENOENT; + return error; +} - if (ext4_has_inline_data(inode)) { - int has_inline = 1; +static const struct iomap_ops ext4_iomap_xattr_ops = { + .iomap_begin = ext4_iomap_xattr_begin, +}; - error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline, - start, len); +static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len) +{ + u64 maxbytes; - if (has_inline) - return error; - } + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + maxbytes = inode->i_sb->s_maxbytes; + else + maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; + + if (*len == 0) + return -EINVAL; + if (start > maxbytes) + return -EFBIG; + + /* + * Shrink request scope to what the fs can actually handle. + */ + if (*len > maxbytes || (maxbytes - *len) < start) + *len = maxbytes - start; + return 0; +} + +int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + int error = 0; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { error = ext4_ext_precache(inode); @@ -5123,48 +4953,31 @@ static int _ext4_fiemap(struct inode *inode, fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } - /* fallback to generic here if not in extents fmt */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && - fill == ext4_fill_fiemap_extents) - return generic_block_fiemap(inode, fieinfo, start, len, - ext4_get_block); - - if (fill == ext4_fill_es_cache_info) - ext4_fiemap_flags &= FIEMAP_FLAG_XATTR; - if (fiemap_check_flags(fieinfo, ext4_fiemap_flags)) - return -EBADR; + /* + * For bitmap files the maximum size limit could be smaller than + * s_maxbytes, so check len here manually instead of just relying on the + * generic check. + */ + error = ext4_fiemap_check_ranges(inode, start, &len); + if (error) + return error; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { - error = ext4_xattr_fiemap(inode, fieinfo); - } else { - ext4_lblk_t len_blks; - __u64 last_blk; - - start_blk = start >> inode->i_sb->s_blocksize_bits; - last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; - if (last_blk >= EXT_MAX_BLOCKS) - last_blk = EXT_MAX_BLOCKS-1; - len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; - - /* - * Walk the extent tree gathering extent information - * and pushing extents back to the user. - */ - error = fill(inode, start_blk, len_blks, fieinfo); + fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; + return iomap_fiemap(inode, fieinfo, start, len, + &ext4_iomap_xattr_ops); } - return error; -} -int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len) -{ - return _ext4_fiemap(inode, fieinfo, start, len, - ext4_fill_fiemap_extents); + return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops); } int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { + ext4_lblk_t start_blk, len_blks; + __u64 last_blk; + int error = 0; + if (ext4_has_inline_data(inode)) { int has_inline; @@ -5175,39 +4988,32 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return 0; } - return _ext4_fiemap(inode, fieinfo, start, len, - ext4_fill_es_cache_info); -} + if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { + error = ext4_ext_precache(inode); + if (error) + return error; + fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; + } + error = fiemap_prep(inode, fieinfo, start, &len, 0); + if (error) + return error; -/* - * ext4_access_path: - * Function to access the path buffer for marking it dirty. - * It also checks if there are sufficient credits left in the journal handle - * to update path. - */ -static int -ext4_access_path(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int credits, err; + error = ext4_fiemap_check_ranges(inode, start, &len); + if (error) + return error; - if (!ext4_handle_valid(handle)) - return 0; + start_blk = start >> inode->i_sb->s_blocksize_bits; + last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; + if (last_blk >= EXT_MAX_BLOCKS) + last_blk = EXT_MAX_BLOCKS-1; + len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* - * Check if need to extend journal credits - * 3 for leaf, sb, and inode plus 2 (bmap and group - * descriptor) for each block group; assume two block - * groups + * Walk the extent tree gathering extent information + * and pushing extents back to the user. */ - credits = ext4_writepage_trans_blocks(inode); - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); - if (err < 0) - return err; - - err = ext4_ext_get_access(handle, inode, path); - return err; + return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } /* @@ -5224,6 +5030,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; + int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { @@ -5233,13 +5040,26 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + /* leaf + sb + inode */ + credits = 3; + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { + update = true; + /* extent tree + sb + inode */ + credits = depth + 2; + } - err = ext4_access_path(handle, inode, path + depth); - if (err) + restart_credits = ext4_writepage_trans_blocks(inode); + err = ext4_datasem_ensure_credits(handle, inode, credits, + restart_credits, 0); + if (err) { + if (err > 0) + err = -EAGAIN; goto out; + } - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = true; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5270,7 +5090,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, } /* Update index too */ - err = ext4_access_path(handle, inode, path + depth); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; @@ -5309,6 +5129,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; + ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, @@ -5362,11 +5183,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; + if (tmp != EXT_MAX_BLOCKS) + *iterator = tmp; + /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator @@ -5395,6 +5220,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } + tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + @@ -5413,12 +5239,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); + /* iterator can be NULL which means we should break */ + if (ret == -EAGAIN) + goto again; if (ret) break; } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return ret; } @@ -5427,9 +5255,11 @@ out: * This implements the fallocate's collapse range functionality for ext4 * Returns: 0 and non-zero on error. */ -static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; + struct address_space *mapping = inode->i_mapping; ext4_lblk_t punch_start, punch_stop; handle_t *handle; unsigned int credits; @@ -5479,11 +5309,15 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* * Prevent page faults from reinstantiating pages we have released from * page cache. */ - down_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) @@ -5498,15 +5332,15 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * Write tail of the last page before removed range since it will get * removed from the page cache below. */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); + ret = filemap_write_and_wait_range(mapping, ioffset, offset); if (ret) goto out_mmap; /* * Write data that will be shifted to preserve them when discarding * page cache below. We are also protected from pages becoming dirty - * by i_mmap_sem. + * by i_rwsem and invalidate_lock. */ - ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, + ret = filemap_write_and_wait_range(mapping, offset + len, LLONG_MAX); if (ret) goto out_mmap; @@ -5518,9 +5352,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ret = ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); @@ -5534,7 +5369,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5551,13 +5386,13 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (IS_SYNC(inode)) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + ret = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_mmap: - up_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_unlock(mapping); out_mutex: inode_unlock(inode); return ret; @@ -5571,9 +5406,11 @@ out_mutex: * by len bytes. * Returns 0 on success, error otherwise. */ -static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; + struct address_space *mapping = inode->i_mapping; handle_t *handle; struct ext4_ext_path *path; struct ext4_extent *extent; @@ -5628,11 +5465,15 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* * Prevent page faults from reinstantiating pages we have released from * page cache. */ - down_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_lock(mapping); ret = ext4_break_layouts(inode); if (ret) @@ -5656,6 +5497,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; @@ -5666,7 +5508,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { @@ -5696,15 +5538,13 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) EXT4_GET_BLOCKS_METADATA_NOFAIL); } - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (ret < 0) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } } else { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } ret = ext4_es_remove_extent(inode, offset_lblk, @@ -5731,7 +5571,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); out_mmap: - up_write(&EXT4_I(inode)->i_mmap_sem); + filemap_invalidate_unlock(mapping); out_mutex: inode_unlock(inode); return ret; @@ -5752,7 +5592,7 @@ out_mutex: * stuff such as page-cache locking consistency, bh mapping consistency or * extent's data copying must be performed by caller. * Locking: - * i_mutex is held for both inodes + * i_rwsem is held for both inodes * i_data_sem is locked for write for both inodes * Assumptions: * All pages from requested range are locked for both inodes @@ -5800,7 +5640,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, } ex1 = path1[path1->p_depth].p_ext; ex2 = path2[path2->p_depth].p_ext; - /* Do we have somthing to swap ? */ + /* Do we have something to swap ? */ if (unlikely(!ex2 || !ex1)) goto finish; @@ -5924,10 +5764,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, count -= len; repeat: - ext4_ext_drop_refs(path1); - kfree(path1); - ext4_ext_drop_refs(path2); - kfree(path2); + ext4_free_ext_path(path1); + ext4_free_ext_path(path2); path1 = path2 = NULL; } return replaced_count; @@ -6006,8 +5844,269 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return err ? err : mapped; } + +/* + * Updates physical block address and unwritten status of extent + * starting at lblk start and of len. If such an extent doesn't exist, + * this function splits the extent tree appropriately to create an + * extent like this. This function is called in the fast commit + * replay path. Returns 0 on success and error on failure. + */ +int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, + int len, int unwritten, ext4_fsblk_t pblk) +{ + struct ext4_ext_path *path = NULL, *ppath; + struct ext4_extent *ex; + int ret; + + path = ext4_find_extent(inode, start, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + ex = path[path->p_depth].p_ext; + if (!ex) { + ret = -EFSCORRUPTED; + goto out; + } + + if (le32_to_cpu(ex->ee_block) != start || + ext4_ext_get_actual_len(ex) != len) { + /* We need to split this extent to match our extent first */ + ppath = path; + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1); + up_write(&EXT4_I(inode)->i_data_sem); + if (ret) + goto out; + kfree(path); + path = ext4_find_extent(inode, start, NULL, 0); + if (IS_ERR(path)) + return -1; + ppath = path; + ex = path[path->p_depth].p_ext; + WARN_ON(le32_to_cpu(ex->ee_block) != start); + if (ext4_ext_get_actual_len(ex) != len) { + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_force_split_extent_at(NULL, inode, &ppath, + start + len, 1); + up_write(&EXT4_I(inode)->i_data_sem); + if (ret) + goto out; + kfree(path); + path = ext4_find_extent(inode, start, NULL, 0); + if (IS_ERR(path)) + return -EINVAL; + ex = path[path->p_depth].p_ext; + } + } + if (unwritten) + ext4_ext_mark_unwritten(ex); + else + ext4_ext_mark_initialized(ex); + ext4_ext_store_pblock(ex, pblk); + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]); + up_write(&EXT4_I(inode)->i_data_sem); +out: + ext4_free_ext_path(path); + ext4_mark_inode_dirty(NULL, inode); + return ret; +} + +/* Try to shrink the extent tree */ +void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent *ex; + ext4_lblk_t old_cur, cur = 0; + + while (cur < end) { + path = ext4_find_extent(inode, cur, NULL, 0); + if (IS_ERR(path)) + return; + ex = path[path->p_depth].p_ext; + if (!ex) { + ext4_free_ext_path(path); + ext4_mark_inode_dirty(NULL, inode); + return; + } + old_cur = cur; + cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); + if (cur <= old_cur) + cur = old_cur + 1; + ext4_ext_try_to_merge(NULL, inode, path, ex); + down_write(&EXT4_I(inode)->i_data_sem); + ext4_ext_dirty(NULL, inode, &path[path->p_depth]); + up_write(&EXT4_I(inode)->i_data_sem); + ext4_mark_inode_dirty(NULL, inode); + ext4_free_ext_path(path); + } +} + +/* Check if *cur is a hole and if it is, skip it */ +static int skip_hole(struct inode *inode, ext4_lblk_t *cur) +{ + int ret; + struct ext4_map_blocks map; + + map.m_lblk = *cur; + map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur; + + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; + if (ret != 0) + return 0; + *cur = *cur + map.m_len; + return 0; +} + +/* Count number of blocks used by this inode and update i_blocks */ +int ext4_ext_replay_set_iblocks(struct inode *inode) +{ + struct ext4_ext_path *path = NULL, *path2 = NULL; + struct ext4_extent *ex; + ext4_lblk_t cur = 0, end; + int numblks = 0, i, ret = 0; + ext4_fsblk_t cmp1, cmp2; + struct ext4_map_blocks map; + + /* Determin the size of the file first */ + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, + EXT4_EX_NOCACHE); + if (IS_ERR(path)) + return PTR_ERR(path); + ex = path[path->p_depth].p_ext; + if (!ex) { + ext4_free_ext_path(path); + goto out; + } + end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); + ext4_free_ext_path(path); + + /* Count the number of data blocks */ + cur = 0; + while (cur < end) { + map.m_lblk = cur; + map.m_len = end - cur; + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + break; + if (ret > 0) + numblks += ret; + cur = cur + map.m_len; + } + + /* + * Count the number of extent tree blocks. We do it by looking up + * two successive extents and determining the difference between + * their paths. When path is different for 2 successive extents + * we compare the blocks in the path at each level and increment + * iblocks by total number of differences found. + */ + cur = 0; + ret = skip_hole(inode, &cur); + if (ret < 0) + goto out; + path = ext4_find_extent(inode, cur, NULL, 0); + if (IS_ERR(path)) + goto out; + numblks += path->p_depth; + ext4_free_ext_path(path); + while (cur < end) { + path = ext4_find_extent(inode, cur, NULL, 0); + if (IS_ERR(path)) + break; + ex = path[path->p_depth].p_ext; + if (!ex) { + ext4_free_ext_path(path); + return 0; + } + cur = max(cur + 1, le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)); + ret = skip_hole(inode, &cur); + if (ret < 0) { + ext4_free_ext_path(path); + break; + } + path2 = ext4_find_extent(inode, cur, NULL, 0); + if (IS_ERR(path2)) { + ext4_free_ext_path(path); + break; + } + for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { + cmp1 = cmp2 = 0; + if (i <= path->p_depth) + cmp1 = path[i].p_bh ? + path[i].p_bh->b_blocknr : 0; + if (i <= path2->p_depth) + cmp2 = path2[i].p_bh ? + path2[i].p_bh->b_blocknr : 0; + if (cmp1 != cmp2 && cmp2 != 0) + numblks++; + } + ext4_free_ext_path(path); + ext4_free_ext_path(path2); + } + +out: + inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9); + ext4_mark_inode_dirty(NULL, inode); + return 0; +} + +int ext4_ext_clear_bb(struct inode *inode) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent *ex; + ext4_lblk_t cur = 0, end; + int j, ret = 0; + struct ext4_map_blocks map; + + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) + return 0; + + /* Determin the size of the file first */ + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, + EXT4_EX_NOCACHE); + if (IS_ERR(path)) + return PTR_ERR(path); + ex = path[path->p_depth].p_ext; + if (!ex) { + ext4_free_ext_path(path); + return 0; + } + end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); + ext4_free_ext_path(path); + + cur = 0; + while (cur < end) { + map.m_lblk = cur; + map.m_len = end - cur; + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + break; + if (ret > 0) { + path = ext4_find_extent(inode, map.m_lblk, NULL, 0); + if (!IS_ERR_OR_NULL(path)) { + for (j = 0; j < path->p_depth; j++) { + + ext4_mb_mark_bb(inode->i_sb, + path[j].p_block, 1, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + 0, path[j].p_block, 1, 1); + } + ext4_free_ext_path(path); + } + ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + map.m_lblk, map.m_pblk, map.m_len, 1); + } + cur = cur + map.m_len; + } + + return 0; +} |