aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c1109
1 files changed, 882 insertions, 227 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf4f318..74292a71b384 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -37,7 +37,6 @@
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <linux/falloc.h>
#include <asm/uaccess.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
@@ -51,8 +50,8 @@
*/
#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
due to ENOSPC */
-#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
-#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
+#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
@@ -144,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
{
if (path->p_bh) {
/* path points to block */
+ BUFFER_TRACE(path->p_bh, "get_write_access");
return ext4_journal_get_write_access(handle, path->p_bh);
}
/* path points to leaf/index in inode body */
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct ext4_ext_path *path)
{
int err;
+
+ WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
if (path->p_bh) {
ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
/* path points to block */
@@ -360,8 +362,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +391,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -508,7 +527,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
lblk - prev, ~0,
EXTENT_STATUS_HOLE);
- if (ext4_ext_is_uninitialized(ex))
+ if (ext4_ext_is_unwritten(ex))
status = EXTENT_STATUS_UNWRITTEN;
ext4_es_cache_extent(inode, lblk, len,
ext4_ext_pblock(ex), status);
@@ -604,7 +623,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
- ext4_ext_is_uninitialized(path->p_ext),
+ ext4_ext_is_unwritten(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
ext4_ext_pblock(path->p_ext));
} else
@@ -630,7 +649,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
+ ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug("\n");
@@ -661,7 +680,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(ex->ee_block),
ext4_ext_pblock(ex),
- ext4_ext_is_uninitialized(ex),
+ ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
newblock);
ex++;
@@ -786,7 +805,7 @@ ext4_ext_binsearch(struct inode *inode,
ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_pblock(path->p_ext),
- ext4_ext_is_uninitialized(path->p_ext),
+ ext4_ext_is_unwritten(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
#ifdef CHECK_BINSEARCH
@@ -1670,11 +1689,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
/*
* Make sure that both extents are initialized. We don't merge
- * uninitialized extents so that we can be sure that end_io code has
+ * unwritten extents so that we can be sure that end_io code has
* the extent that was written properly split out and conversion to
* initialized is trivial.
*/
- if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
+ if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
return 0;
ext1_ee_len = ext4_ext_get_actual_len(ex1);
@@ -1691,6 +1710,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
+ if (ext4_ext_is_unwritten(ex1) &&
+ (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
+ atomic_read(&EXT4_I(inode)->i_unwritten) ||
+ (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
+ return 0;
#ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4)
return 0;
@@ -1714,7 +1738,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
{
struct ext4_extent_header *eh;
unsigned int depth, len;
- int merge_done = 0;
+ int merge_done = 0, unwritten;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@@ -1724,8 +1748,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
break;
/* merge with next extent! */
+ unwritten = ext4_ext_is_unwritten(ex);
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1));
+ if (unwritten)
+ ext4_ext_mark_unwritten(ex);
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1783,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
- EXT4_FREE_BLOCKS_RESERVE);
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}
/*
@@ -1834,8 +1860,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1845,7 +1870,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -1880,7 +1905,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *npath = NULL;
int depth, len, err;
ext4_lblk_t next;
- int mb_flags = 0;
+ int mb_flags = 0, unwritten;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1920,19 +1945,21 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %u:[%d]%d"
"(from %llu)\n",
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
+ ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode,
path + depth);
if (err)
return err;
-
+ unwritten = ext4_ext_is_unwritten(ex);
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
+ if (unwritten)
+ ext4_ext_mark_unwritten(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
@@ -1944,10 +1971,10 @@ prepend:
ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
"(from %llu)\n",
le32_to_cpu(newext->ee_block),
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
+ ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode,
@@ -1955,10 +1982,13 @@ prepend:
if (err)
return err;
+ unwritten = ext4_ext_is_unwritten(ex);
ex->ee_block = newext->ee_block;
ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
+ if (unwritten)
+ ext4_ext_mark_unwritten(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
@@ -2018,7 +2048,7 @@ has_space:
ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext));
nearex = EXT_FIRST_EXTENT(eh);
} else {
@@ -2029,7 +2059,7 @@ has_space:
"nearest %p\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
nearex);
nearex++;
@@ -2040,7 +2070,7 @@ has_space:
"nearest %p\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
nearex);
}
@@ -2050,7 +2080,7 @@ has_space:
"move %d extents from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
- ext4_ext_is_uninitialized(newext),
+ ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
len, nearex, nearex + 1);
memmove(nearex + 1, nearex,
@@ -2172,7 +2202,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
es.es_lblk = le32_to_cpu(ex->ee_block);
es.es_len = ext4_ext_get_actual_len(ex);
es.es_pblk = ext4_ext_pblock(ex);
- if (ext4_ext_is_uninitialized(ex))
+ if (ext4_ext_is_unwritten(ex))
flags |= FIEMAP_EXTENT_UNWRITTEN;
}
@@ -2504,7 +2534,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent, we have to mark the cluster as used (store negative
* cluster number in partial_cluster).
*/
- unaligned = pblk & (sbi->s_cluster_ratio - 1);
+ unaligned = EXT4_PBLK_COFF(sbi, pblk);
if (unaligned && (ee_len == num) &&
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2548,7 +2578,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
unsigned num;
ext4_lblk_t ex_ee_block;
unsigned short ex_ee_len;
- unsigned uninitialized = 0;
+ unsigned unwritten = 0;
struct ext4_extent *ex;
ext4_fsblk_t pblk;
@@ -2569,18 +2599,39 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
+ /*
+ * If we're starting with an extent other than the last one in the
+ * node, we need to see if it shares a cluster with the extent to
+ * the right (towards the end of the file). If its leftmost cluster
+ * is this extent's rightmost cluster and it is not cluster aligned,
+ * we'll mark it as a partial that is not to be deallocated.
+ */
+
+ if (ex != EXT_LAST_EXTENT(eh)) {
+ ext4_fsblk_t current_pblk, right_pblk;
+ long long current_cluster, right_cluster;
+
+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
+ right_pblk = ext4_ext_pblock(ex + 1);
+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
+ if (current_cluster == right_cluster &&
+ EXT4_PBLK_COFF(sbi, right_pblk))
+ *partial_cluster = -right_cluster;
+ }
+
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
while (ex >= EXT_FIRST_EXTENT(eh) &&
ex_ee_block + ex_ee_len > start) {
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
+ if (ext4_ext_is_unwritten(ex))
+ unwritten = 1;
else
- uninitialized = 0;
+ unwritten = 0;
ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
- uninitialized, ex_ee_len);
+ unwritten, ex_ee_len);
path[depth].p_ext = ex;
a = ex_ee_block > start ? ex_ee_block : start;
@@ -2598,7 +2649,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* accidentally freeing it later on
*/
pblk = ext4_ext_pblock(ex);
- if (pblk & (sbi->s_cluster_ratio - 1))
+ if (EXT4_PBLK_COFF(sbi, pblk))
*partial_cluster =
-((long long)EXT4_B2C(sbi, pblk));
ex--;
@@ -2652,11 +2703,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex->ee_len = cpu_to_le16(num);
/*
- * Do not mark uninitialized if all the blocks in the
+ * Do not mark unwritten if all the blocks in the
* extent have been removed.
*/
- if (uninitialized && num)
- ext4_ext_mark_uninitialized(ex);
+ if (unwritten && num)
+ ext4_ext_mark_unwritten(ex);
/*
* If the extent was completely released,
* we need to remove it from the leaf
@@ -2694,10 +2745,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
err = ext4_ext_correct_indexes(handle, inode, path);
/*
- * Free the partial cluster only if the current extent does not
- * reference it. Otherwise we might free used cluster.
+ * If there's a partial cluster and at least one extent remains in
+ * the leaf, free the partial cluster if it isn't shared with the
+ * current extent. If there's a partial cluster and no extents
+ * remain in the leaf, it can't be freed here. It can only be
+ * freed when it's possible to determine if it's not shared with
+ * any other extent - when the next leaf is processed or when space
+ * removal is complete.
*/
- if (*partial_cluster > 0 &&
+ if (*partial_cluster > 0 && eh->eh_entries &&
(EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
*partial_cluster)) {
int flags = get_default_free_blocks_flags(inode);
@@ -2800,9 +2856,9 @@ again:
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
int split_flag = 0;
- if (ext4_ext_is_uninitialized(ex))
- split_flag = EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2;
+ if (ext4_ext_is_unwritten(ex))
+ split_flag = EXT4_EXT_MARK_UNWRIT1 |
+ EXT4_EXT_MARK_UNWRIT2;
/*
* Split the extent in two so that 'end' is the last
@@ -3059,7 +3115,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
* @path: the path to the extent
* @split: the logical block where the extent is splitted.
* @split_flags: indicates if the extent could be zeroout if split fails, and
- * the states(init or uninit) of new extents.
+ * the states(init or unwritten) of new extents.
* @flags: flags used to insert new extent to extent tree.
*
*
@@ -3101,10 +3157,10 @@ static int ext4_split_extent_at(handle_t *handle,
newblock = split - ee_block + ext4_ext_pblock(ex);
BUG_ON(split < ee_block || split >= (ee_block + ee_len));
- BUG_ON(!ext4_ext_is_uninitialized(ex) &&
+ BUG_ON(!ext4_ext_is_unwritten(ex) &&
split_flag & (EXT4_EXT_MAY_ZEROOUT |
- EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2));
+ EXT4_EXT_MARK_UNWRIT1 |
+ EXT4_EXT_MARK_UNWRIT2));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3116,8 +3172,8 @@ static int ext4_split_extent_at(handle_t *handle,
* then we just change the state of the extent, and splitting
* is not needed.
*/
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- ext4_ext_mark_uninitialized(ex);
+ if (split_flag & EXT4_EXT_MARK_UNWRIT2)
+ ext4_ext_mark_unwritten(ex);
else
ext4_ext_mark_initialized(ex);
@@ -3131,8 +3187,8 @@ static int ext4_split_extent_at(handle_t *handle,
/* case a */
memcpy(&orig_ex, ex, sizeof(orig_ex));
ex->ee_len = cpu_to_le16(split - ee_block);
- if (split_flag & EXT4_EXT_MARK_UNINIT1)
- ext4_ext_mark_uninitialized(ex);
+ if (split_flag & EXT4_EXT_MARK_UNWRIT1)
+ ext4_ext_mark_unwritten(ex);
/*
* path may lead to new leaf, not to original leaf any more
@@ -3146,8 +3202,8 @@ static int ext4_split_extent_at(handle_t *handle,
ex2->ee_block = cpu_to_le32(split);
ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
ext4_ext_store_pblock(ex2, newblock);
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- ext4_ext_mark_uninitialized(ex2);
+ if (split_flag & EXT4_EXT_MARK_UNWRIT2)
+ ext4_ext_mark_unwritten(ex2);
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
@@ -3198,7 +3254,7 @@ out:
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
- ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
}
@@ -3224,7 +3280,7 @@ static int ext4_split_extent(handle_t *handle,
struct ext4_extent *ex;
unsigned int ee_len, depth;
int err = 0;
- int uninitialized;
+ int unwritten;
int split_flag1, flags1;
int allocated = map->m_len;
@@ -3232,14 +3288,14 @@ static int ext4_split_extent(handle_t *handle,
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
- uninitialized = ext4_ext_is_uninitialized(ex);
+ unwritten = ext4_ext_is_unwritten(ex);
if (map->m_lblk + map->m_len < ee_block + ee_len) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
- if (uninitialized)
- split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2;
+ if (unwritten)
+ split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
+ EXT4_EXT_MARK_UNWRIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path,
@@ -3259,15 +3315,20 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path);
depth = ext_depth(inode);
ex = path[depth].p_ext;
- uninitialized = ext4_ext_is_uninitialized(ex);
+ if (!ex) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) map->m_lblk);
+ return -EIO;
+ }
+ unwritten = ext4_ext_is_unwritten(ex);
split_flag1 = 0;
if (map->m_lblk >= ee_block) {
split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
- if (uninitialized) {
- split_flag1 |= EXT4_EXT_MARK_UNINIT1;
+ if (unwritten) {
+ split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
- EXT4_EXT_MARK_UNINIT2);
+ EXT4_EXT_MARK_UNWRIT2);
}
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk, split_flag1, flags);
@@ -3282,16 +3343,16 @@ out:
/*
* This function is called by ext4_ext_map_blocks() if someone tries to write
- * to an uninitialized extent. It may result in splitting the uninitialized
+ * to an unwritten extent. It may result in splitting the unwritten
* extent into multiple extents (up to three - one initialized and two
- * uninitialized).
+ * unwritten).
* There are three possibilities:
* a> There is no split required: Entire extent should be initialized
* b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent
*
* Pre-conditions:
- * - The extent pointed to by 'path' is uninitialized.
+ * - The extent pointed to by 'path' is unwritten.
* - The extent pointed to by 'path' contains a superset
* of the logical span [map->m_lblk, map->m_lblk + map->m_len).
*
@@ -3337,12 +3398,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
/* Pre-conditions */
- BUG_ON(!ext4_ext_is_uninitialized(ex));
+ BUG_ON(!ext4_ext_is_unwritten(ex));
BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
/*
* Attempt to transfer newly initialized blocks from the currently
- * uninitialized extent to its neighbor. This is much cheaper
+ * unwritten extent to its neighbor. This is much cheaper
* than an insertion followed by a merge as those involve costly
* memmove() calls. Transferring to the left is the common case in
* steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
@@ -3378,7 +3439,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* - C4: abut_ex can receive the additional blocks without
* overflowing the (initialized) length limit.
*/
- if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
+ if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
((prev_lblk + prev_len) == ee_block) && /*C2*/
((prev_pblk + prev_len) == ee_pblk) && /*C3*/
(prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
@@ -3393,7 +3454,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_block = cpu_to_le32(ee_block + map_len);
ext4_ext_store_pblock(ex, ee_pblk + map_len);
ex->ee_len = cpu_to_le16(ee_len - map_len);
- ext4_ext_mark_uninitialized(ex); /* Restore the flag */
+ ext4_ext_mark_unwritten(ex); /* Restore the flag */
/* Extend abut_ex by 'map_len' blocks */
abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
@@ -3424,7 +3485,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* - C4: abut_ex can receive the additional blocks without
* overflowing the (initialized) length limit.
*/
- if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
+ if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
((map->m_lblk + map_len) == next_lblk) && /*C2*/
((ee_pblk + ee_len) == next_pblk) && /*C3*/
(next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
@@ -3439,7 +3500,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
ex->ee_len = cpu_to_le16(ee_len - map_len);
- ext4_ext_mark_uninitialized(ex); /* Restore the flag */
+ ext4_ext_mark_unwritten(ex); /* Restore the flag */
/* Extend abut_ex by 'map_len' blocks */
abut_ex->ee_len = cpu_to_le16(next_len + map_len);
@@ -3461,7 +3522,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
WARN_ON(map->m_lblk < ee_block);
/*
* It is safe to convert extent to initialized via explicit
- * zeroout only if extent is fully insde i_size or new_size.
+ * zeroout only if extent is fully inside i_size or new_size.
*/
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
@@ -3544,26 +3605,28 @@ out:
/*
* This function is called by ext4_ext_map_blocks() from
* ext4_get_blocks_dio_write() when DIO to write
- * to an uninitialized extent.
+ * to an unwritten extent.
*
- * Writing to an uninitialized extent may result in splitting the uninitialized
- * extent into multiple initialized/uninitialized extents (up to three)
+ * Writing to an unwritten extent may result in splitting the unwritten
+ * extent into multiple initialized/unwritten extents (up to three)
* There are three possibilities:
- * a> There is no split required: Entire extent should be uninitialized
+ * a> There is no split required: Entire extent should be unwritten
* b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent
*
+ * This works the same way in the case of initialized -> unwritten conversion.
+ *
* One of more index blocks maybe needed if the extent tree grow after
- * the uninitialized extent split. To prevent ENOSPC occur at the IO
- * complete, we need to split the uninitialized extent before DIO submit
- * the IO. The uninitialized extent called at this time will be split
- * into three uninitialized extent(at most). After IO complete, the part
+ * the unwritten extent split. To prevent ENOSPC occur at the IO
+ * complete, we need to split the unwritten extent before DIO submit
+ * the IO. The unwritten extent called at this time will be split
+ * into three unwritten extent(at most). After IO complete, the part
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents().
*
- * Returns the size of uninitialized extent to be written on success.
+ * Returns the size of unwritten extent to be written on success.
*/
-static int ext4_split_unwritten_extents(handle_t *handle,
+static int ext4_split_convert_extents(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path,
@@ -3575,9 +3638,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
unsigned int ee_len;
int split_flag = 0, depth;
- ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
- "block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)map->m_lblk, map->m_len);
+ ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
+ __func__, inode->i_ino,
+ (unsigned long long)map->m_lblk, map->m_len);
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
@@ -3592,14 +3655,79 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
- split_flag |= EXT4_EXT_MARK_UNINIT2;
- if (flags & EXT4_GET_BLOCKS_CONVERT)
- split_flag |= EXT4_EXT_DATA_VALID2;
+ /* Convert to unwritten */
+ if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
+ split_flag |= EXT4_EXT_DATA_VALID1;
+ /* Convert to initialized */
+ } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
+ split_flag |= ee_block + ee_len <= eof_block ?
+ EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
+ }
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
+static int ext4_convert_initialized_extents(handle_t *handle,
+ struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
+{
+ struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
+ int depth;
+ int err = 0;
+
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+
+ ext_debug("%s: inode %lu, logical"
+ "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+ (unsigned long long)ee_block, ee_len);
+
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_convert_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+ if (err < 0)
+ goto out;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) map->m_lblk);
+ err = -EIO;
+ goto out;
+ }
+ }
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+ /* first mark the extent as unwritten */
+ ext4_ext_mark_unwritten(ex);
+
+ /* note: ext4_ext_correct_indexes() isn't needed here because
+ * borders are not changed
+ */
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
+}
+
+
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
@@ -3633,8 +3761,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len);
#endif
- err = ext4_split_unwritten_extents(handle, inode, map, path,
- EXT4_GET_BLOCKS_CONVERT);
+ err = ext4_split_convert_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT);
if (err < 0)
goto out;
ext4_ext_drop_refs(path);
@@ -3753,7 +3881,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3940,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3950,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -3835,7 +3963,39 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
static int
-ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
+ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path, int flags,
+ unsigned int allocated, ext4_fsblk_t newblock)
+{
+ int ret = 0;
+ int err = 0;
+
+ /*
+ * Make sure that the extent is no bigger than we support with
+ * unwritten extent
+ */
+ if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
+ map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
+
+ ret = ext4_convert_initialized_extents(handle, inode, map,
+ path);
+ if (ret >= 0) {
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk,
+ path, map->m_len);
+ } else
+ err = ret;
+ map->m_flags |= EXT4_MAP_UNWRITTEN;
+ if (allocated > map->m_len)
+ allocated = map->m_len;
+ map->m_len = allocated;
+
+ return err ? err : allocated;
+}
+
+static int
+ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path, int flags,
unsigned int allocated, ext4_fsblk_t newblock)
@@ -3844,25 +4004,25 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
int err = 0;
ext4_io_end_t *io = ext4_inode_aio(inode);
- ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
+ ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
flags, allocated);
ext4_ext_show_leaf(inode, path);
/*
- * When writing into uninitialized space, we should not fail to
+ * When writing into unwritten space, we should not fail to
* allocate metadata blocks for the new extent block if needed.
*/
flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
- trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
+ trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
allocated, newblock);
/* get_block() before submit the IO, split the extent */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- ret = ext4_split_unwritten_extents(handle, inode, map,
- path, flags);
+ if (flags & EXT4_GET_BLOCKS_PRE_IO) {
+ ret = ext4_split_convert_extents(handle, inode, map,
+ path, flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
/*
@@ -3875,12 +4035,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNWRITTEN;
- if (ext4_should_dioread_nolock(inode))
- map->m_flags |= EXT4_MAP_UNINIT;
goto out;
}
/* IO end_io complete, convert the filled extent to written */
- if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
+ if (flags & EXT4_GET_BLOCKS_CONVERT) {
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
path);
if (ret >= 0) {
@@ -3890,6 +4048,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
} else
err = ret;
map->m_flags |= EXT4_MAP_MAPPED;
+ map->m_pblk = newblock;
if (allocated > map->m_len)
allocated = map->m_len;
map->m_len = allocated;
@@ -3900,7 +4059,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* repeat fallocate creation request
* we already have an unwritten extent
*/
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) {
+ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
map->m_flags |= EXT4_MAP_UNWRITTEN;
goto map_out;
}
@@ -3976,10 +4135,6 @@ out1:
map->m_pblk = newblock;
map->m_len = allocated;
out2:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
return err ? err : allocated;
}
@@ -4030,7 +4185,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4203,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4112,7 +4266,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent newex, *ex, *ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0;
- int free_on_err = 0, err = 0, depth;
+ int free_on_err = 0, err = 0, depth, ret;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
@@ -4154,8 +4308,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len;
+
/*
- * Uninitialized extents are treated as holes, except that
+ * unwritten extents are treated as holes, except that
* we split out initialized portions during a write.
*/
ee_len = ext4_ext_get_actual_len(ex);
@@ -4170,13 +4325,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
ee_block, ee_len, newblock);
- if (!ext4_ext_is_uninitialized(ex))
+ /*
+ * If the extent is initialized check whether the
+ * caller wants to convert it to unwritten.
+ */
+ if ((!ext4_ext_is_unwritten(ex)) &&
+ (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
+ allocated = ext4_ext_convert_initialized_extent(
+ handle, inode, map, path, flags,
+ allocated, newblock);
+ goto out2;
+ } else if (!ext4_ext_is_unwritten(ex))
goto out;
- allocated = ext4_ext_handle_uninitialized_extents(
+ ret = ext4_ext_handle_unwritten_extents(
handle, inode, map, path, flags,
allocated, newblock);
- goto out3;
+ if (ret < 0)
+ err = ret;
+ else
+ allocated = ret;
+ goto out2;
}
}
@@ -4203,7 +4372,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4241,15 +4410,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent. For an initialized extent this limit is
- * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
- * EXT_UNINIT_MAX_LEN.
+ * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
+ * EXT_UNWRITTEN_MAX_LEN.
*/
if (map->m_len > EXT_INIT_MAX_LEN &&
- !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
+ !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
map->m_len = EXT_INIT_MAX_LEN;
- else if (map->m_len > EXT_UNINIT_MAX_LEN &&
- (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
- map->m_len = EXT_UNINIT_MAX_LEN;
+ else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
+ (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
+ map->m_len = EXT_UNWRITTEN_MAX_LEN;
/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
newex.ee_len = cpu_to_le16(map->m_len);
@@ -4271,7 +4440,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
@@ -4297,21 +4466,19 @@ got_allocated_blocks:
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock + offset);
newex.ee_len = cpu_to_le16(ar.len);
- /* Mark uninitialized */
- if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
- ext4_ext_mark_uninitialized(&newex);
+ /* Mark unwritten */
+ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
+ ext4_ext_mark_unwritten(&newex);
map->m_flags |= EXT4_MAP_UNWRITTEN;
/*
* io_end structure was created for every IO write to an
- * uninitialized extent. To avoid unnecessary conversion,
+ * unwritten extent. To avoid unnecessary conversion,
* here we flag the IO that really needs the conversion.
* For non asycn direct IO case, flag the inode state
* that we need to perform conversion when IO is done.
*/
- if ((flags & EXT4_GET_BLOCKS_PRE_IO))
+ if (flags & EXT4_GET_BLOCKS_PRE_IO)
set_unwritten = 1;
- if (ext4_should_dioread_nolock(inode))
- map->m_flags |= EXT4_MAP_UNINIT;
}
err = 0;
@@ -4438,9 +4605,9 @@ got_allocated_blocks:
/*
* Cache the extent and update transaction to commit on fdatasync only
- * when it is _not_ an uninitialized extent.
+ * when it is _not_ an unwritten extent.
*/
- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+ if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
else
ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -4457,7 +4624,6 @@ out2:
kfree(path);
}
-out3:
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
ext4_es_lru_add(inode);
@@ -4498,34 +4664,233 @@ retry:
ext4_std_error(inode->i_sb, err);
}
-static void ext4_falloc_update_inode(struct inode *inode,
- int mode, loff_t new_size, int update_ctime)
+static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
+ ext4_lblk_t len, loff_t new_size,
+ int flags, int mode)
{
- struct timespec now;
+ struct inode *inode = file_inode(file);
+ handle_t *handle;
+ int ret = 0;
+ int ret2 = 0;
+ int retries = 0;
+ struct ext4_map_blocks map;
+ unsigned int credits;
+ loff_t epos;
+
+ map.m_lblk = offset;
+ map.m_len = len;
+ /*
+ * Don't normalize the request if it can fit in one extent so
+ * that it doesn't get unnecessarily split into multiple
+ * extents.
+ */
+ if (len <= EXT_UNWRITTEN_MAX_LEN)
+ flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- if (update_ctime) {
- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_ctime, &now))
- inode->i_ctime = now;
+ /*
+ * credits to insert 1 extent into extent tree
+ */
+ credits = ext4_chunk_trans_blocks(inode, len);
+
+retry:
+ while (ret >= 0 && len) {
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+ credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ break;
+ }
+ ret = ext4_map_blocks(handle, inode, &map, flags);
+ if (ret <= 0) {
+ ext4_debug("inode #%lu: block %u: len %u: "
+ "ext4_ext_map_blocks returned %d",
+ inode->i_ino, map.m_lblk,
+ map.m_len, ret);
+ ext4_mark_inode_dirty(handle, inode);
+ ret2 = ext4_journal_stop(handle);
+ break;
+ }
+ map.m_lblk += ret;
+ map.m_len = len = len - ret;
+ epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ inode->i_ctime = ext4_current_time(inode);
+ if (new_size) {
+ if (epos > new_size)
+ epos = new_size;
+ if (ext4_update_inode_size(inode, epos) & 0x1)
+ inode->i_mtime = inode->i_ctime;
+ } else {
+ if (epos > inode->i_size)
+ ext4_set_inode_flag(inode,
+ EXT4_INODE_EOFBLOCKS);
+ }
+ ext4_mark_inode_dirty(handle, inode);
+ ret2 = ext4_journal_stop(handle);
+ if (ret2)
+ break;
}
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ ret = 0;
+ goto retry;
+ }
+
+ return ret > 0 ? ret2 : ret;
+}
+
+static long ext4_zero_range(struct file *file, loff_t offset,
+ loff_t len, int mode)
+{
+ struct inode *inode = file_inode(file);
+ handle_t *handle = NULL;
+ unsigned int max_blocks;
+ loff_t new_size = 0;
+ int ret = 0;
+ int flags;
+ int credits;
+ int partial_begin, partial_end;
+ loff_t start, end;
+ ext4_lblk_t lblk;
+ struct address_space *mapping = inode->i_mapping;
+ unsigned int blkbits = inode->i_blkbits;
+
+ trace_ext4_zero_range(inode, offset, len, mode);
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ /* Call ext4_force_commit to flush all data in case of data=journal. */
+ if (ext4_should_journal_data(inode)) {
+ ret = ext4_force_commit(inode->i_sb);
+ if (ret)
+ return ret;
+ }
+
/*
- * Update only when preallocation was requested beyond
- * the file size.
+ * Write out all dirty pages to avoid race conditions
+ * Then release them.
*/
- if (!(mode & FALLOC_FL_KEEP_SIZE)) {
- if (new_size > i_size_read(inode))
- i_size_write(inode, new_size);
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
- } else {
+ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ ret = filemap_write_and_wait_range(mapping, offset,
+ offset + len - 1);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Round up offset. This is not fallocate, we neet to zero out
+ * blocks, so convert interior block aligned part of the range to
+ * unwritten and possibly manually zero out unaligned parts of the
+ * range.
+ */
+ start = round_up(offset, 1 << blkbits);
+ end = round_down((offset + len), 1 << blkbits);
+
+ if (start < offset || end > offset + len)
+ return -EINVAL;
+ partial_begin = offset & ((1 << blkbits) - 1);
+ partial_end = (offset + len) & ((1 << blkbits) - 1);
+
+ lblk = start >> blkbits;
+ max_blocks = (end >> blkbits);
+ if (max_blocks < lblk)
+ max_blocks = 0;
+ else
+ max_blocks -= lblk;
+
+ flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+
+ mutex_lock(&inode->i_mutex);
+
+ /*
+ * Indirect files do not support unwritten extnets
+ */
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ ret = -EOPNOTSUPP;
+ goto out_mutex;
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ ret = inode_newsize_ok(inode, new_size);
+ if (ret)
+ goto out_mutex;
+ /*
+ * If we have a partial block after EOF we have to allocate
+ * the entire block.
+ */
+ if (partial_end)
+ max_blocks += 1;
+ }
+
+ if (max_blocks > 0) {
+
+ /* Now release the pages and zero block aligned part of pages*/
+ truncate_pagecache_range(inode, start, end - 1);
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
/*
- * Mark that we allocate beyond EOF so the subsequent truncate
- * can proceed even if the new size is the same as i_size.
+ * Remove entire range from the extent status tree.
*/
- if (new_size > i_size_read(inode))
+ ret = ext4_es_remove_extent(inode, lblk, max_blocks);
+ if (ret)
+ goto out_dio;
+
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
+ if (ret)
+ goto out_dio;
+ }
+ if (!partial_begin && !partial_end)
+ goto out_dio;
+
+ /*
+ * In worst case we have to writeout two nonadjacent unwritten
+ * blocks and update the inode
+ */
+ credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
+ if (ext4_should_journal_data(inode))
+ credits += 2;
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ ext4_std_error(inode->i_sb, ret);
+ goto out_dio;
+ }
+
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ if (new_size) {
+ ext4_update_inode_size(inode, new_size);
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+ * can proceed even if the new size is the same as i_size.
+ */
+ if ((offset + len) > i_size_read(inode))
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
}
+ ext4_mark_inode_dirty(handle, inode);
+
+ /* Zero out partial block at the edges of the range */
+ ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+
+ if (file->f_flags & O_SYNC)
+ ext4_handle_sync(handle);
+ ext4_journal_stop(handle);
+out_dio:
+ ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
}
/*
@@ -4538,18 +4903,16 @@ static void ext4_falloc_update_inode(struct inode *inode,
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- handle_t *handle;
- loff_t new_size;
+ loff_t new_size = 0;
unsigned int max_blocks;
int ret = 0;
- int ret2 = 0;
- int retries = 0;
int flags;
- struct ext4_map_blocks map;
- unsigned int credits, blkbits = inode->i_blkbits;
+ ext4_lblk_t lblk;
+ unsigned int blkbits = inode->i_blkbits;
/* Return error if mode is not supported */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -4566,83 +4929,48 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP;
+ if (mode & FALLOC_FL_COLLAPSE_RANGE)
+ return ext4_collapse_range(inode, offset, len);
+
+ if (mode & FALLOC_FL_ZERO_RANGE)
+ return ext4_zero_range(file, offset, len, mode);
+
trace_ext4_fallocate_enter(inode, offset, len, mode);
- map.m_lblk = offset >> blkbits;
+ lblk = offset >> blkbits;
/*
* We can't just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048
*/
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - map.m_lblk;
- /*
- * credits to insert 1 extent into extent tree
- */
- credits = ext4_chunk_trans_blocks(inode, max_blocks);
- mutex_lock(&inode->i_mutex);
- ret = inode_newsize_ok(inode, (len + offset));
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
- return ret;
- }
- flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
+ - lblk;
+
+ flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
- /*
- * Don't normalize the request if it can fit in one extent so
- * that it doesn't get unnecessarily split into multiple
- * extents.
- */
- if (len <= EXT_UNINIT_MAX_LEN << blkbits)
- flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
-retry:
- while (ret >= 0 && ret < max_blocks) {
- map.m_lblk = map.m_lblk + ret;
- map.m_len = max_blocks = max_blocks - ret;
- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
- credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- break;
- }
- ret = ext4_map_blocks(handle, inode, &map, flags);
- if (ret <= 0) {
-#ifdef EXT4FS_DEBUG
- ext4_warning(inode->i_sb,
- "inode #%lu: block %u: len %u: "
- "ext4_ext_map_blocks returned %d",
- inode->i_ino, map.m_lblk,
- map.m_len, ret);
-#endif
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- break;
- }
- if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
- blkbits) >> blkbits))
- new_size = offset + len;
- else
- new_size = ((loff_t) map.m_lblk + ret) << blkbits;
+ mutex_lock(&inode->i_mutex);
- ext4_falloc_update_inode(inode, mode, new_size,
- (map.m_flags & EXT4_MAP_NEW));
- ext4_mark_inode_dirty(handle, inode);
- if ((file->f_flags & O_SYNC) && ret >= max_blocks)
- ext4_handle_sync(handle);
- ret2 = ext4_journal_stop(handle);
- if (ret2)
- break;
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ ret = inode_newsize_ok(inode, new_size);
+ if (ret)
+ goto out;
}
- if (ret == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries)) {
- ret = 0;
- goto retry;
+
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
+ if (ret)
+ goto out;
+
+ if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+ ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ EXT4_I(inode)->i_sync_tid);
}
+out:
mutex_unlock(&inode->i_mutex);
- trace_ext4_fallocate_exit(inode, offset, max_blocks,
- ret > 0 ? ret2 : ret);
- return ret > 0 ? ret2 : ret;
+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
+ return ret;
}
/*
@@ -4853,3 +5181,330 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ext4_es_lru_add(inode);
return error;
}
+
+/*
+ * ext4_access_path:
+ * Function to access the path buffer for marking it dirty.
+ * It also checks if there are sufficient credits left in the journal handle
+ * to update path.
+ */
+static int
+ext4_access_path(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ int credits, err;
+
+ if (!ext4_handle_valid(handle))
+ return 0;
+
+ /*
+ * Check if need to extend journal credits
+ * 3 for leaf, sb, and inode plus 2 (bmap and group
+ * descriptor) for each block group; assume two block
+ * groups
+ */
+ if (handle->h_buffer_credits < 7) {
+ credits = ext4_writepage_trans_blocks(inode);
+ err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+ /* EAGAIN is success */
+ if (err && err != -EAGAIN)
+ return err;
+ }
+
+ err = ext4_ext_get_access(handle, inode, path);
+ return err;
+}
+
+/*
+ * ext4_ext_shift_path_extents:
+ * Shift the extents of a path structure lying between path[depth].p_ext
+ * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
+ * from starting block for each extent.
+ */
+static int
+ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
+ struct inode *inode, handle_t *handle,
+ ext4_lblk_t *start)
+{
+ int depth, err = 0;
+ struct ext4_extent *ex_start, *ex_last;
+ bool update = 0;
+ depth = path->p_depth;
+
+ while (depth >= 0) {
+ if (depth == path->p_depth) {
+ ex_start = path[depth].p_ext;
+ if (!ex_start)
+ return -EIO;
+
+ ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
+ if (!ex_last)
+ return -EIO;
+
+ err = ext4_access_path(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
+ update = 1;
+
+ *start = le32_to_cpu(ex_last->ee_block) +
+ ext4_ext_get_actual_len(ex_last);
+
+ while (ex_start <= ex_last) {
+ le32_add_cpu(&ex_start->ee_block, -shift);
+ /* Try to merge to the left. */
+ if ((ex_start >
+ EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
+ ext4_ext_try_to_merge_right(inode,
+ path, ex_start - 1))
+ ex_last--;
+ else
+ ex_start++;
+ }
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ if (--depth < 0 || !update)
+ break;
+ }
+
+ /* Update index too */
+ err = ext4_access_path(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ /* we are done if current index is not a starting index */
+ if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
+ break;
+
+ depth--;
+ }
+
+out:
+ return err;
+}
+
+/*
+ * ext4_ext_shift_extents:
+ * All the extents which lies in the range from start to the last allocated
+ * block for the file are shifted downwards by shift blocks.
+ * On success, 0 is returned, error otherwise.
+ */
+static int
+ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
+ ext4_lblk_t start, ext4_lblk_t shift)
+{
+ struct ext4_ext_path *path;
+ int ret = 0, depth;
+ struct ext4_extent *extent;
+ ext4_lblk_t stop_block, current_block;
+ ext4_lblk_t ex_start, ex_end;
+
+ /* Let path point to the last extent */
+ path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ depth = path->p_depth;
+ extent = path[depth].p_ext;
+ if (!extent) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ return ret;
+ }
+
+ stop_block = le32_to_cpu(extent->ee_block) +
+ ext4_ext_get_actual_len(extent);
+ ext4_ext_drop_refs(path);
+ kfree(path);
+
+ /* Nothing to shift, if hole is at the end of file */
+ if (start >= stop_block)
+ return ret;
+
+ /*
+ * Don't start shifting extents until we make sure the hole is big
+ * enough to accomodate the shift.
+ */
+ path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ depth = path->p_depth;
+ extent = path[depth].p_ext;
+ if (extent) {
+ ex_start = le32_to_cpu(extent->ee_block);
+ ex_end = le32_to_cpu(extent->ee_block) +
+ ext4_ext_get_actual_len(extent);
+ } else {
+ ex_start = 0;
+ ex_end = 0;
+ }
+ ext4_ext_drop_refs(path);
+ kfree(path);
+
+ if ((start == ex_start && shift > ex_start) ||
+ (shift > start - ex_end))
+ return -EINVAL;
+
+ /* Its safe to start updating extents */
+ while (start < stop_block) {
+ path = ext4_ext_find_extent(inode, start, NULL, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ depth = path->p_depth;
+ extent = path[depth].p_ext;
+ if (!extent) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) start);
+ return -EIO;
+ }
+
+ current_block = le32_to_cpu(extent->ee_block);
+ if (start > current_block) {
+ /* Hole, move to the next extent */
+ ret = mext_next_extent(inode, path, &extent);
+ if (ret != 0) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ if (ret == 1)
+ ret = 0;
+ break;
+ }
+ }
+ ret = ext4_ext_shift_path_extents(path, shift, inode,
+ handle, &start);
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * ext4_collapse_range:
+ * This implements the fallocate's collapse range functionality for ext4
+ * Returns: 0 and non-zero on error.
+ */
+int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+{
+ struct super_block *sb = inode->i_sb;
+ ext4_lblk_t punch_start, punch_stop;
+ handle_t *handle;
+ unsigned int credits;
+ loff_t new_size, ioffset;
+ int ret;
+
+ /* Collapse range works only on fs block size aligned offsets. */
+ if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+ len & (EXT4_CLUSTER_SIZE(sb) - 1))
+ return -EINVAL;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ trace_ext4_collapse_range(inode, offset, len);
+
+ punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+ punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+ /* Call ext4_force_commit to flush all data in case of data=journal. */
+ if (ext4_should_journal_data(inode)) {
+ ret = ext4_force_commit(inode->i_sb);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Need to round down offset to be aligned with page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+
+ /* Write out all dirty pages */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+ LLONG_MAX);
+ if (ret)
+ return ret;
+
+ /* Take mutex lock */
+ mutex_lock(&inode->i_mutex);
+
+ /*
+ * There is no need to overlap collapse range with EOF, in which case
+ * it is effectively a truncate operation
+ */
+ if (offset + len >= i_size_read(inode)) {
+ ret = -EINVAL;
+ goto out_mutex;
+ }
+
+ /* Currently just for extent based files */
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ ret = -EOPNOTSUPP;
+ goto out_mutex;
+ }
+
+ truncate_pagecache(inode, ioffset);
+
+ /* Wait for existing dio to complete */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
+ credits = ext4_writepage_trans_blocks(inode);
+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out_dio;
+ }
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_preallocations(inode);
+
+ ret = ext4_es_remove_extent(inode, punch_start,
+ EXT_MAX_BLOCKS - punch_start);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto out_stop;
+ }
+
+ ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto out_stop;
+ }
+ ext4_discard_preallocations(inode);
+
+ ret = ext4_ext_shift_extents(inode, handle, punch_stop,
+ punch_stop - punch_start);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto out_stop;
+ }
+
+ new_size = i_size_read(inode) - len;
+ i_size_write(inode, new_size);
+ EXT4_I(inode)->i_disksize = new_size;
+
+ up_write(&EXT4_I(inode)->i_data_sem);
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ ext4_mark_inode_dirty(handle, inode);
+
+out_stop:
+ ext4_journal_stop(handle);
+out_dio:
+ ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}