aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c370
1 files changed, 233 insertions, 137 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3edf2ac3..556484cf5d93 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -87,19 +87,9 @@ void btrfs_leak_debug_check(void)
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct extent_io_tree *tree, u64 start, u64 end)
{
- struct inode *inode;
- u64 isize;
-
- if (!tree->mapping)
- return;
-
- inode = tree->mapping->host;
- isize = i_size_read(inode);
- if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
- btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
- "%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
- }
+ if (tree->ops && tree->ops->check_extent_io_range)
+ tree->ops->check_extent_io_range(tree->private_data, caller,
+ start, end);
}
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
@@ -154,9 +144,9 @@ static noinline void flush_write_bio(void *data);
static inline struct btrfs_fs_info *
tree_fs_info(struct extent_io_tree *tree)
{
- if (!tree->mapping)
- return NULL;
- return btrfs_sb(tree->mapping->host->i_sb);
+ if (tree->ops)
+ return tree->ops->tree_fs_info(tree->private_data);
+ return NULL;
}
int __init extent_io_init(void)
@@ -174,7 +164,8 @@ int __init extent_io_init(void)
goto free_state_cache;
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio));
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS);
if (!btrfs_bioset)
goto free_buffer_cache;
@@ -213,13 +204,13 @@ void extent_io_exit(void)
}
void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping)
+ void *private_data)
{
tree->state = RB_ROOT;
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
- tree->mapping = mapping;
+ tree->private_data = private_data;
}
static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -369,8 +360,7 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
if (tree->ops && tree->ops->merge_extent_hook)
- tree->ops->merge_extent_hook(tree->mapping->host, new,
- other);
+ tree->ops->merge_extent_hook(tree->private_data, new, other);
}
/*
@@ -421,15 +411,14 @@ static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->set_bit_hook)
- tree->ops->set_bit_hook(tree->mapping->host, state, bits);
+ tree->ops->set_bit_hook(tree->private_data, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
- tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host),
- state, bits);
+ tree->ops->clear_bit_hook(tree->private_data, state, bits);
}
static void set_state_bits(struct extent_io_tree *tree,
@@ -478,7 +467,7 @@ static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
u64 split)
{
if (tree->ops && tree->ops->split_extent_hook)
- tree->ops->split_extent_hook(tree->mapping->host, orig, split);
+ tree->ops->split_extent_hook(tree->private_data, orig, split);
}
/*
@@ -1402,17 +1391,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
*/
static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page); /* Pages should be in the extent_io_tree */
- set_page_writeback(page);
- put_page(page);
- index++;
- }
+ tree->ops->set_range_writeback(tree->private_data, start, end);
}
/* find the first state struct with 'bits' set after 'start', and
@@ -1961,11 +1940,12 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
SetPageUptodate(page);
}
-int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree,
+ struct io_failure_record *rec)
{
int ret;
int err = 0;
- struct extent_io_tree *failure_tree = &inode->io_failure_tree;
set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
@@ -1974,7 +1954,7 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
if (ret)
err = ret;
- ret = clear_extent_bits(&inode->io_tree, rec->start,
+ ret = clear_extent_bits(io_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_DAMAGED);
if (ret && !err)
@@ -1994,11 +1974,10 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
-int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
- u64 logical, struct page *page,
- unsigned int pg_offset, int mirror_num)
+int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ u64 length, u64 logical, struct page *page,
+ unsigned int pg_offset, int mirror_num)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio;
struct btrfs_device *dev;
u64 map_length = 0;
@@ -2009,9 +1988,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
+ bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
map_length = length;
@@ -2070,7 +2047,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- btrfs_ino(inode), start,
+ ino, start,
rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2090,8 +2067,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
- ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start,
- PAGE_SIZE, start, p,
+ ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
start - page_offset(p), mirror_num);
if (ret)
break;
@@ -2105,24 +2081,24 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
*/
-int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
- unsigned int pg_offset)
+int clean_io_failure(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree, u64 start,
+ struct page *page, u64 ino, unsigned int pg_offset)
{
u64 private;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *state;
int num_copies;
int ret;
private = 0;
- ret = count_range_bits(&inode->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY, 0);
+ ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
+ EXTENT_DIRTY, 0);
if (!ret)
return 0;
- ret = get_state_failrec(&inode->io_failure_tree, start,
- &failrec);
+ ret = get_state_failrec(failure_tree, start, &failrec);
if (ret)
return 0;
@@ -2138,25 +2114,25 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
if (fs_info->sb->s_flags & MS_RDONLY)
goto out;
- spin_lock(&inode->io_tree.lock);
- state = find_first_extent_bit_state(&inode->io_tree,
+ spin_lock(&io_tree->lock);
+ state = find_first_extent_bit_state(io_tree,
failrec->start,
EXTENT_LOCKED);
- spin_unlock(&inode->io_tree.lock);
+ spin_unlock(&io_tree->lock);
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
- repair_io_failure(inode, start, failrec->len,
- failrec->logical, page,
- pg_offset, failrec->failed_mirror);
+ repair_io_failure(fs_info, ino, start, failrec->len,
+ failrec->logical, page, pg_offset,
+ failrec->failed_mirror);
}
}
out:
- free_io_failure(inode, failrec);
+ free_io_failure(failure_tree, io_tree, failrec);
return 0;
}
@@ -2356,10 +2332,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct btrfs_io_bio *btrfs_failed_bio;
struct btrfs_io_bio *btrfs_bio;
- bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return NULL;
-
+ bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
bio->bi_bdev = fs_info->fs_devices->latest_bdev;
@@ -2397,8 +2370,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct io_failure_record *failrec;
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *bio;
int read_mode = 0;
+ blk_status_t status;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2409,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
if (!ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
@@ -2422,7 +2397,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
(int)phy_offset, failed_bio->bi_end_io,
NULL);
if (!bio) {
- free_io_failure(BTRFS_I(inode), failrec);
+ free_io_failure(failure_tree, tree, failrec);
return -EIO;
}
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -2431,11 +2406,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+ status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
failrec->bio_flags, 0);
- if (ret) {
- free_io_failure(BTRFS_I(inode), failrec);
+ if (status) {
+ free_io_failure(failure_tree, tree, failrec);
bio_put(bio);
+ ret = blk_status_to_errno(status);
}
return ret;
@@ -2458,7 +2434,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
- ret = ret < 0 ? ret : -EIO;
+ ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
}
@@ -2474,6 +2450,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio)
{
+ int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
u64 start;
u64 end;
@@ -2503,7 +2480,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- end_extent_writepage(page, bio->bi_error, start, end);
+ end_extent_writepage(page, error, start, end);
end_page_writeback(page);
}
@@ -2536,9 +2513,9 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- struct extent_io_tree *tree;
+ struct extent_io_tree *tree, *failure_tree;
u64 offset = 0;
u64 start;
u64 end;
@@ -2556,9 +2533,10 @@ static void end_bio_extent_readpage(struct bio *bio)
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
- (u64)bio->bi_iter.bi_sector, bio->bi_error,
+ (u64)bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
+ failure_tree = &BTRFS_I(inode)->io_failure_tree;
/* We always issue full-page reads, but if some block
* in a page fails to read, blk_update_request() will
@@ -2588,8 +2566,10 @@ static void end_bio_extent_readpage(struct bio *bio)
if (ret)
uptodate = 0;
else
- clean_io_failure(BTRFS_I(inode), start,
- page, 0);
+ clean_io_failure(BTRFS_I(inode)->root->fs_info,
+ failure_tree, tree, start,
+ page,
+ btrfs_ino(BTRFS_I(inode)), 0);
}
if (likely(uptodate))
@@ -2615,7 +2595,7 @@ static void end_bio_extent_readpage(struct bio *bio)
ret = bio_readpage_error(bio, offset, page,
start, end, mirror);
if (ret == 0) {
- uptodate = !bio->bi_error;
+ uptodate = !bio->bi_status;
offset += len;
continue;
}
@@ -2673,77 +2653,80 @@ readpage_ok:
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, bio->bi_error);
+ io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
/*
- * this allocates from the btrfs_bioset. We're returning a bio right now
- * but you can call btrfs_io_bio for the appropriate container_of magic
+ * Initialize the members up to but not including 'bio'. Use after allocating a
+ * new bio by bio_alloc_bioset as it does not initialize the bytes outside of
+ * 'bio' because use of __GFP_ZERO is not supported.
*/
-struct bio *
-btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
+static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
{
- struct btrfs_io_bio *btrfs_bio;
- struct bio *bio;
-
- bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
+ memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
+}
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2)) {
- bio = bio_alloc_bioset(gfp_flags,
- nr_vecs, btrfs_bioset);
- }
- }
+/*
+ * The following helpers allocate a bio. As it's backed by a bioset, it'll
+ * never fail. We're returning a bio right now but you can call btrfs_io_bio
+ * for the appropriate container_of magic
+ */
+struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+{
+ struct bio *bio;
- if (bio) {
- bio->bi_bdev = bdev;
- bio->bi_iter.bi_sector = first_sector;
- btrfs_bio = btrfs_io_bio(bio);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = first_byte >> 9;
+ btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
-struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+struct bio *btrfs_bio_clone(struct bio *bio)
{
struct btrfs_io_bio *btrfs_bio;
struct bio *new;
- new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
- if (new) {
- btrfs_bio = btrfs_io_bio(new);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ /* Bio allocation backed by a bioset does not fail */
+ new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+ btrfs_bio = btrfs_io_bio(new);
+ btrfs_io_bio_init(btrfs_bio);
+ btrfs_bio->iter = bio->bi_iter;
return new;
}
-/* this also allocates from the btrfs_bioset */
-struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
{
- struct btrfs_io_bio *btrfs_bio;
struct bio *bio;
- bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
- if (bio) {
- btrfs_bio = btrfs_io_bio(bio);
- btrfs_bio->csum = NULL;
- btrfs_bio->csum_allocated = NULL;
- btrfs_bio->end_io = NULL;
- }
+ /* Bio allocation backed by a bioset does not fail */
+ bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+ btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
+struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
+{
+ struct bio *bio;
+ struct btrfs_io_bio *btrfs_bio;
+
+ /* this will never fail when it's backed by a bioset */
+ bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+ ASSERT(bio);
+
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_io_bio_init(btrfs_bio);
+
+ bio_trim(bio, offset >> 9, size >> 9);
+ btrfs_bio->iter = bio->bi_iter;
+ return bio;
+}
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
- int ret = 0;
+ blk_status_t ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
@@ -2755,13 +2738,13 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
bio_get(bio);
if (tree->ops)
- ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
+ ret = tree->ops->submit_bio_hook(tree->private_data, bio,
mirror_num, bio_flags, start);
else
btrfsic_submit_bio(bio);
bio_put(bio);
- return ret;
+ return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2818,14 +2801,11 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
- GFP_NOFS | __GFP_HIGH);
- if (!bio)
- return -ENOMEM;
-
+ bio = btrfs_bio_alloc(bdev, sector << 9);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio->bi_write_hint = page->mapping->host->i_write_hint;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
@@ -3597,9 +3577,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
- __percpu_counter_add(&fs_info->dirty_metadata_bytes,
- -eb->len,
- fs_info->dirty_metadata_batch);
+ percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
+ -eb->len,
+ fs_info->dirty_metadata_batch);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
@@ -3707,7 +3687,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (bio->bi_error ||
+ if (bio->bi_status ||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
@@ -3757,7 +3737,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
* header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
*/
start = btrfs_item_nr_offset(nritems);
- end = btrfs_leaf_data(eb) + leaf_data_end(fs_info, eb);
+ end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
memzero_extent_buffer(eb, start, end - start);
}
@@ -4377,6 +4357,119 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+ bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache,
+ u64 offset, u64 phys, u64 len, u32 flags)
+{
+ int ret = 0;
+
+ if (!cache->cached)
+ goto assign;
+
+ /*
+ * Sanity check, extent_fiemap() should have ensured that new
+ * fiemap extent won't overlap with cahced one.
+ * Not recoverable.
+ *
+ * NOTE: Physical address can overlap, due to compression
+ */
+ if (cache->offset + cache->len > offset) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ /*
+ * Only merges fiemap extents if
+ * 1) Their logical addresses are continuous
+ *
+ * 2) Their physical addresses are continuous
+ * So truly compressed (physical size smaller than logical size)
+ * extents won't get merged with each other
+ *
+ * 3) Share same flags except FIEMAP_EXTENT_LAST
+ * So regular extent won't get merged with prealloc extent
+ */
+ if (cache->offset + cache->len == offset &&
+ cache->phys + cache->len == phys &&
+ (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+ (flags & ~FIEMAP_EXTENT_LAST)) {
+ cache->len += len;
+ cache->flags |= flags;
+ goto try_submit_last;
+ }
+
+ /* Not mergeable, need to submit cached one */
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret)
+ return ret;
+assign:
+ cache->cached = true;
+ cache->offset = offset;
+ cache->phys = phys;
+ cache->len = len;
+ cache->flags = flags;
+try_submit_last:
+ if (cache->flags & FIEMAP_EXTENT_LAST) {
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+ cache->phys, cache->len, cache->flags);
+ cache->cached = false;
+ }
+ return ret;
+}
+
+/*
+ * Emit last fiemap cache
+ *
+ * The last fiemap cache may still be cached in the following case:
+ * 0 4k 8k
+ * |<- Fiemap range ->|
+ * |<------------ First extent ----------->|
+ *
+ * In this case, the first extent range will be cached but not emitted.
+ * So we must emit it before ending extent_fiemap().
+ */
+static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
+ struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ int ret;
+
+ if (!cache->cached)
+ return 0;
+
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4394,6 +4487,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct fiemap_cache cache = { 0 };
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4573,8 +4667,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
+ ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
+ em_len, flags);
if (ret) {
if (ret == 1)
ret = 0;
@@ -4582,6 +4676,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
}
out_free:
+ if (!ret)
+ ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);