aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/compression.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r--fs/btrfs/compression.c685
1 files changed, 407 insertions, 278 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7869ad12bc6e..32da97c3c19d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
+#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
@@ -28,6 +29,7 @@
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
+#include "subpage.h"
#include "zoned.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@@ -172,16 +174,17 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
/* Hash through the page sector by sector */
for (pg_offset = 0; pg_offset < bytes_left;
pg_offset += sectorsize) {
- kaddr = page_address(page);
+ kaddr = kmap_atomic(page);
crypto_shash_digest(shash, kaddr + pg_offset,
sectorsize, csum);
+ kunmap_atomic(kaddr);
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
- if (btrfs_io_bio(bio)->device)
+ if (btrfs_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
- btrfs_io_bio(bio)->device,
+ btrfs_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
@@ -192,6 +195,87 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
return 0;
}
+/*
+ * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
+ *
+ * Return true if there is no pending bio nor io.
+ * Return false otherwise.
+ */
+static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
+ unsigned int bi_size = 0;
+ bool last_io = false;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ /*
+ * At endio time, bi_iter.bi_size doesn't represent the real bio size.
+ * Thus here we have to iterate through all segments to grab correct
+ * bio size.
+ */
+ bio_for_each_segment_all(bvec, bio, iter_all)
+ bi_size += bvec->bv_len;
+
+ if (bio->bi_status)
+ cb->errors = 1;
+
+ ASSERT(bi_size && bi_size <= cb->compressed_len);
+ last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
+ &cb->pending_sectors);
+ /*
+ * Here we must wake up the possible error handler after all other
+ * operations on @cb finished, or we can race with
+ * finish_compressed_bio_*() which may free @cb.
+ */
+ wake_up_var(cb);
+
+ return last_io;
+}
+
+static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
+{
+ unsigned int index;
+ struct page *page;
+
+ /* Release the compressed pages */
+ for (index = 0; index < cb->nr_pages; index++) {
+ page = cb->compressed_pages[index];
+ page->mapping = NULL;
+ put_page(page);
+ }
+
+ /* Do io completion on the original bio */
+ if (cb->errors) {
+ bio_io_error(cb->orig_bio);
+ } else {
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ ASSERT(bio);
+ ASSERT(!bio->bi_status);
+ /*
+ * We have verified the checksum already, set page checked so
+ * the end_io handlers know about it
+ */
+ ASSERT(!bio_flagged(bio, BIO_CLONED));
+ bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
+ u64 bvec_start = page_offset(bvec->bv_page) +
+ bvec->bv_offset;
+
+ btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
+ bvec->bv_page, bvec_start,
+ bvec->bv_len);
+ }
+
+ bio_endio(cb->orig_bio);
+ }
+
+ /* Finally free the cb struct */
+ kfree(cb->compressed_pages);
+ kfree(cb);
+}
+
/* when we finish reading compressed pages from the disk, we
* decompress them and then run the bio end_io routines on the
* decompressed pages (in the inode address space).
@@ -206,25 +290,17 @@ static void end_compressed_bio_read(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
- struct page *page;
- unsigned int index;
- unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
+ unsigned int mirror = btrfs_bio(bio)->mirror_num;
int ret = 0;
- if (bio->bi_status)
- cb->errors = 1;
-
- /* if there are more bios still pending for this compressed
- * extent, just exit
- */
- if (!refcount_dec_and_test(&cb->pending_bios))
+ if (!dec_and_test_compressed_bio(cb, bio))
goto out;
/*
* Record the correct mirror_num in cb->orig_bio so that
* read-repair can work properly.
*/
- btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
+ btrfs_bio(cb->orig_bio)->mirror_num = mirror;
cb->mirror_num = mirror;
/*
@@ -248,36 +324,7 @@ static void end_compressed_bio_read(struct bio *bio)
csum_failed:
if (ret)
cb->errors = 1;
-
- /* release the compressed pages */
- index = 0;
- for (index = 0; index < cb->nr_pages; index++) {
- page = cb->compressed_pages[index];
- page->mapping = NULL;
- put_page(page);
- }
-
- /* do io completion on the original bio */
- if (cb->errors) {
- bio_io_error(cb->orig_bio);
- } else {
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- /*
- * we have verified the checksum already, set page
- * checked so the end_io handlers know about it
- */
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
- SetPageChecked(bvec->bv_page);
-
- bio_endio(cb->orig_bio);
- }
-
- /* finally free the cb struct */
- kfree(cb->compressed_pages);
- kfree(cb);
+ finish_compressed_bio_read(cb, bio);
out:
bio_put(bio);
}
@@ -289,6 +336,7 @@ out:
static noinline void end_compressed_writeback(struct inode *inode,
const struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
@@ -311,7 +359,8 @@ static noinline void end_compressed_writeback(struct inode *inode,
for (i = 0; i < ret; i++) {
if (cb->errors)
SetPageError(pages[i]);
- end_page_writeback(pages[i]);
+ btrfs_page_clamp_clear_writeback(fs_info, pages[i],
+ cb->start, cb->len);
put_page(pages[i]);
}
nr_pages -= ret;
@@ -320,60 +369,127 @@ static noinline void end_compressed_writeback(struct inode *inode,
/* the inode may be gone now */
}
-/*
- * do the cleanup once all the compressed pages hit the disk.
- * This will clear writeback on the file pages and free the compressed
- * pages.
- *
- * This also calls the writeback end hooks for the file pages so that
- * metadata and checksums can be updated in the file.
- */
-static void end_compressed_bio_write(struct bio *bio)
+static void finish_compressed_bio_write(struct compressed_bio *cb)
{
- struct compressed_bio *cb = bio->bi_private;
- struct inode *inode;
- struct page *page;
+ struct inode *inode = cb->inode;
unsigned int index;
- if (bio->bi_status)
- cb->errors = 1;
-
- /* if there are more bios still pending for this compressed
- * extent, just exit
- */
- if (!refcount_dec_and_test(&cb->pending_bios))
- goto out;
-
- /* ok, we're the last bio for this extent, step one is to
- * call back into the FS and do all the end_io operations
+ /*
+ * Ok, we're the last bio for this extent, step one is to call back
+ * into the FS and do all the end_io operations.
*/
- inode = cb->inode;
- btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
!cb->errors);
end_compressed_writeback(inode, cb);
- /* note, our inode could be gone now */
+ /* Note, our inode could be gone now */
/*
- * release the compressed pages, these came from alloc_page and
+ * Release the compressed pages, these came from alloc_page and
* are not attached to the inode at all
*/
- index = 0;
for (index = 0; index < cb->nr_pages; index++) {
- page = cb->compressed_pages[index];
+ struct page *page = cb->compressed_pages[index];
+
page->mapping = NULL;
put_page(page);
}
- /* finally free the cb struct */
+ /* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
+}
+
+/*
+ * Do the cleanup once all the compressed pages hit the disk. This will clear
+ * writeback on the file pages and free the compressed pages.
+ *
+ * This also calls the writeback end hooks for the file pages so that metadata
+ * and checksums can be updated in the file.
+ */
+static void end_compressed_bio_write(struct bio *bio)
+{
+ struct compressed_bio *cb = bio->bi_private;
+
+ if (!dec_and_test_compressed_bio(cb, bio))
+ goto out;
+
+ btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+
+ finish_compressed_bio_write(cb);
out:
bio_put(bio);
}
+static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
+ struct compressed_bio *cb,
+ struct bio *bio, int mirror_num)
+{
+ blk_status_t ret;
+
+ ASSERT(bio->bi_iter.bi_size);
+ ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
+ if (ret)
+ return ret;
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
+ return ret;
+}
+
+/*
+ * Allocate a compressed_bio, which will be used to read/write on-disk
+ * (aka, compressed) * data.
+ *
+ * @cb: The compressed_bio structure, which records all the needed
+ * information to bind the compressed data to the uncompressed
+ * page cache.
+ * @disk_byten: The logical bytenr where the compressed data will be read
+ * from or written to.
+ * @endio_func: The endio function to call after the IO for compressed data
+ * is finished.
+ * @next_stripe_start: Return value of logical bytenr of where next stripe starts.
+ * Let the caller know to only fill the bio up to the stripe
+ * boundary.
+ */
+
+
+static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
+ unsigned int opf, bio_end_io_t endio_func,
+ u64 *next_stripe_start)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
+ struct btrfs_io_geometry geom;
+ struct extent_map *em;
+ struct bio *bio;
+ int ret;
+
+ bio = btrfs_bio_alloc(BIO_MAX_VECS);
+
+ bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
+ bio->bi_opf = opf;
+ bio->bi_private = cb;
+ bio->bi_end_io = endio_func;
+
+ em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
+ if (IS_ERR(em)) {
+ bio_put(bio);
+ return ERR_CAST(em);
+ }
+
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
+
+ ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
+ free_extent_map(em);
+ if (ret < 0) {
+ bio_put(bio);
+ return ERR_PTR(ret);
+ }
+ *next_stripe_start = disk_bytenr + geom.len;
+
+ return bio;
+}
+
/*
* worker function to build and submit bios for previously compressed pages.
* The corresponding pages in the inode should be marked for writeback
@@ -394,20 +510,19 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
struct compressed_bio *cb;
- unsigned long bytes_left;
- int pg_index = 0;
- struct page *page;
- u64 first_byte = disk_start;
+ u64 cur_disk_bytenr = disk_start;
+ u64 next_stripe_start;
blk_status_t ret;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
const bool use_append = btrfs_use_zone_append(inode, disk_start);
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
- WARN_ON(!PAGE_ALIGNED(start));
+ ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
+ IS_ALIGNED(len, fs_info->sectorsize));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
- refcount_set(&cb->pending_bios, 0);
+ refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->inode = &inode->vfs_inode;
cb->start = start;
@@ -418,118 +533,100 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
- bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = bio_op | write_flags;
- bio->bi_private = cb;
- bio->bi_end_io = end_compressed_bio_write;
-
- if (use_append) {
- struct btrfs_device *device;
-
- device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
- if (IS_ERR(device)) {
- kfree(cb);
- bio_put(bio);
- return BLK_STS_NOTSUPP;
+ while (cur_disk_bytenr < disk_start + compressed_len) {
+ u64 offset = cur_disk_bytenr - disk_start;
+ unsigned int index = offset >> PAGE_SHIFT;
+ unsigned int real_size;
+ unsigned int added;
+ struct page *page = compressed_pages[index];
+ bool submit = false;
+
+ /* Allocate new bio if submitted or not yet allocated */
+ if (!bio) {
+ bio = alloc_compressed_bio(cb, cur_disk_bytenr,
+ bio_op | write_flags, end_compressed_bio_write,
+ &next_stripe_start);
+ if (IS_ERR(bio)) {
+ ret = errno_to_blk_status(PTR_ERR(bio));
+ bio = NULL;
+ goto finish_cb;
+ }
}
-
- bio_set_dev(bio, device->bdev);
- }
-
- if (blkcg_css) {
- bio->bi_opf |= REQ_CGROUP_PUNT;
- kthread_associate_blkcg(blkcg_css);
- }
- refcount_set(&cb->pending_bios, 1);
-
- /* create and submit bios for the compressed pages */
- bytes_left = compressed_len;
- for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
- int submit = 0;
- int len = 0;
-
- page = compressed_pages[pg_index];
- page->mapping = inode->vfs_inode.i_mapping;
- if (bio->bi_iter.bi_size)
- submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
- 0);
-
/*
- * Page can only be added to bio if the current bio fits in
- * stripe.
+ * We should never reach next_stripe_start start as we will
+ * submit comp_bio when reach the boundary immediately.
*/
- if (!submit) {
- if (pg_index == 0 && use_append)
- len = bio_add_zone_append_page(bio, page,
- PAGE_SIZE, 0);
- else
- len = bio_add_page(bio, page, PAGE_SIZE, 0);
- }
-
- page->mapping = NULL;
- if (submit || len < PAGE_SIZE) {
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the cb might get
- * freed before we're done setting it up
- */
- refcount_inc(&cb->pending_bios);
- ret = btrfs_bio_wq_end_io(fs_info, bio,
- BTRFS_WQ_ENDIO_DATA);
- BUG_ON(ret); /* -ENOMEM */
+ ASSERT(cur_disk_bytenr != next_stripe_start);
+ /*
+ * We have various limits on the real read size:
+ * - stripe boundary
+ * - page boundary
+ * - compressed length boundary
+ */
+ real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
+ real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, real_size, compressed_len - offset);
+ ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
+
+ if (use_append)
+ added = bio_add_zone_append_page(bio, page, real_size,
+ offset_in_page(offset));
+ else
+ added = bio_add_page(bio, page, real_size,
+ offset_in_page(offset));
+ /* Reached zoned boundary */
+ if (added == 0)
+ submit = true;
+
+ cur_disk_bytenr += added;
+ /* Reached stripe boundary */
+ if (cur_disk_bytenr == next_stripe_start)
+ submit = true;
+
+ /* Finished the range */
+ if (cur_disk_bytenr == disk_start + compressed_len)
+ submit = true;
+
+ if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- ret = btrfs_map_bio(fs_info, bio, 0);
- if (ret) {
- bio->bi_status = ret;
- bio_endio(bio);
+ if (ret)
+ goto finish_cb;
}
- bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = bio_op | write_flags;
- bio->bi_private = cb;
- bio->bi_end_io = end_compressed_bio_write;
- if (blkcg_css)
- bio->bi_opf |= REQ_CGROUP_PUNT;
- /*
- * Use bio_add_page() to ensure the bio has at least one
- * page.
- */
- bio_add_page(bio, page, PAGE_SIZE, 0);
+ ret = submit_compressed_bio(fs_info, cb, bio, 0);
+ if (ret)
+ goto finish_cb;
+ bio = NULL;
}
- if (bytes_left < PAGE_SIZE) {
- btrfs_info(fs_info,
- "bytes left %lu compress len %u nr %u",
- bytes_left, cb->compressed_len, cb->nr_pages);
- }
- bytes_left -= PAGE_SIZE;
- first_byte += PAGE_SIZE;
cond_resched();
}
+ if (blkcg_css)
+ kthread_associate_blkcg(NULL);
- ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
- BUG_ON(ret); /* -ENOMEM */
-
- if (!skip_sum) {
- ret = btrfs_csum_one_bio(inode, bio, start, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
+ return 0;
- ret = btrfs_map_bio(fs_info, bio, 0);
- if (ret) {
+finish_cb:
+ if (bio) {
bio->bi_status = ret;
bio_endio(bio);
}
+ /* Last byte of @cb is submitted, endio will free @cb */
+ if (cur_disk_bytenr == disk_start + compressed_len)
+ return ret;
- if (blkcg_css)
- kthread_associate_blkcg(NULL);
-
- return 0;
+ wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
+ (disk_start + compressed_len - cur_disk_bytenr) >>
+ fs_info->sectorsize_bits);
+ /*
+ * Even with previous bio ended, we should still have io not yet
+ * submitted, thus need to finish manually.
+ */
+ ASSERT(refcount_read(&cb->pending_sectors));
+ /* Now we are the only one referring @cb, can finish it safely. */
+ finish_compressed_bio_write(cb);
+ return ret;
}
static u64 bio_end_offset(struct bio *bio)
@@ -539,25 +636,33 @@ static u64 bio_end_offset(struct bio *bio)
return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
}
+/*
+ * Add extra pages in the same compressed file extent so that we don't need to
+ * re-read the same extent again and again.
+ *
+ * NOTE: this won't work well for subpage, as for subpage read, we lock the
+ * full page then submit bio for each compressed/regular extents.
+ *
+ * This means, if we have several sectors in the same page points to the same
+ * on-disk compressed data, we will re-read the same extent many times and
+ * this function can only help for the next page.
+ */
static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end,
struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long end_index;
- unsigned long pg_index;
- u64 last_offset;
+ u64 cur = bio_end_offset(cb->orig_bio);
u64 isize = i_size_read(inode);
int ret;
struct page *page;
- unsigned long nr_pages = 0;
struct extent_map *em;
struct address_space *mapping = inode->i_mapping;
struct extent_map_tree *em_tree;
struct extent_io_tree *tree;
- u64 end;
- int misses = 0;
+ int sectors_missed = 0;
- last_offset = bio_end_offset(cb->orig_bio);
em_tree = &BTRFS_I(inode)->extent_tree;
tree = &BTRFS_I(inode)->io_tree;
@@ -576,18 +681,29 @@ static noinline int add_ra_bio_pages(struct inode *inode,
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- while (last_offset < compressed_end) {
- pg_index = last_offset >> PAGE_SHIFT;
+ while (cur < compressed_end) {
+ u64 page_end;
+ u64 pg_index = cur >> PAGE_SHIFT;
+ u32 add_size;
if (pg_index > end_index)
break;
page = xa_load(&mapping->i_pages, pg_index);
if (page && !xa_is_value(page)) {
- misses++;
- if (misses > 4)
+ sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
+ fs_info->sectorsize_bits;
+
+ /* Beyond threshold, no need to continue */
+ if (sectors_missed > 4)
break;
- goto next;
+
+ /*
+ * Jump to next page start as we already have page for
+ * current offset.
+ */
+ cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
+ continue;
}
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
@@ -597,14 +713,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
put_page(page);
- goto next;
+ /* There is already a page, skip to page end */
+ cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
+ continue;
}
- /*
- * at this point, we have a locked page in the page cache
- * for these bytes in the file. But, we have to make
- * sure they map to this compressed extent on disk.
- */
ret = set_page_extent_mapped(page);
if (ret < 0) {
unlock_page(page);
@@ -612,18 +725,22 @@ static noinline int add_ra_bio_pages(struct inode *inode,
break;
}
- end = last_offset + PAGE_SIZE - 1;
- lock_extent(tree, last_offset, end);
+ page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
+ lock_extent(tree, cur, page_end);
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, last_offset,
- PAGE_SIZE);
+ em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
read_unlock(&em_tree->lock);
- if (!em || last_offset < em->start ||
- (last_offset + PAGE_SIZE > extent_map_end(em)) ||
+ /*
+ * At this point, we have a locked page in the page cache for
+ * these bytes in the file. But, we have to make sure they map
+ * to this compressed extent on disk.
+ */
+ if (!em || cur < em->start ||
+ (cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
- unlock_extent(tree, last_offset, end);
+ unlock_extent(tree, cur, page_end);
unlock_page(page);
put_page(page);
break;
@@ -641,20 +758,23 @@ static noinline int add_ra_bio_pages(struct inode *inode,
}
}
- ret = bio_add_page(cb->orig_bio, page,
- PAGE_SIZE, 0);
-
- if (ret == PAGE_SIZE) {
- nr_pages++;
- put_page(page);
- } else {
- unlock_extent(tree, last_offset, end);
+ add_size = min(em->start + em->len, page_end + 1) - cur;
+ ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
+ if (ret != add_size) {
+ unlock_extent(tree, cur, page_end);
unlock_page(page);
put_page(page);
break;
}
-next:
- last_offset += PAGE_SIZE;
+ /*
+ * If it's subpage, we also need to increase its
+ * subpage::readers number, as at endio we will decrease
+ * subpage::readers and to unlock the page.
+ */
+ if (fs_info->sectorsize < PAGE_SIZE)
+ btrfs_subpage_start_reader(fs_info, page, cur, add_size);
+ put_page(page);
+ cur += add_size;
}
return 0;
}
@@ -679,9 +799,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
unsigned int compressed_len;
unsigned int nr_pages;
unsigned int pg_index;
- struct page *page;
- struct bio *comp_bio;
- u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
+ struct bio *comp_bio = NULL;
+ const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ u64 cur_disk_byte = disk_bytenr;
+ u64 next_stripe_start;
u64 file_offset;
u64 em_len;
u64 em_start;
@@ -708,7 +829,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!cb)
goto out;
- refcount_set(&cb->pending_bios, 0);
+ refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->inode = inode;
cb->mirror_num = mirror_num;
@@ -748,86 +869,74 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
- comp_bio = btrfs_bio_alloc(cur_disk_byte);
- comp_bio->bi_opf = REQ_OP_READ;
- comp_bio->bi_private = cb;
- comp_bio->bi_end_io = end_compressed_bio_read;
- refcount_set(&cb->pending_bios, 1);
-
- for (pg_index = 0; pg_index < nr_pages; pg_index++) {
- u32 pg_len = PAGE_SIZE;
- int submit = 0;
+ while (cur_disk_byte < disk_bytenr + compressed_len) {
+ u64 offset = cur_disk_byte - disk_bytenr;
+ unsigned int index = offset >> PAGE_SHIFT;
+ unsigned int real_size;
+ unsigned int added;
+ struct page *page = cb->compressed_pages[index];
+ bool submit = false;
+
+ /* Allocate new bio if submitted or not yet allocated */
+ if (!comp_bio) {
+ comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
+ REQ_OP_READ, end_compressed_bio_read,
+ &next_stripe_start);
+ if (IS_ERR(comp_bio)) {
+ ret = errno_to_blk_status(PTR_ERR(comp_bio));
+ comp_bio = NULL;
+ goto finish_cb;
+ }
+ }
+ /*
+ * We should never reach next_stripe_start start as we will
+ * submit comp_bio when reach the boundary immediately.
+ */
+ ASSERT(cur_disk_byte != next_stripe_start);
+ /*
+ * We have various limit on the real read size:
+ * - stripe boundary
+ * - page boundary
+ * - compressed length boundary
+ */
+ real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
+ real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, real_size, compressed_len - offset);
+ ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
+ added = bio_add_page(comp_bio, page, real_size, offset_in_page(offset));
/*
- * To handle subpage case, we need to make sure the bio only
- * covers the range we need.
- *
- * If we're at the last page, truncate the length to only cover
- * the remaining part.
+ * Maximum compressed extent is smaller than bio size limit,
+ * thus bio_add_page() should always success.
*/
- if (pg_index == nr_pages - 1)
- pg_len = min_t(u32, PAGE_SIZE,
- compressed_len - pg_index * PAGE_SIZE);
+ ASSERT(added == real_size);
+ cur_disk_byte += added;
- page = cb->compressed_pages[pg_index];
- page->mapping = inode->i_mapping;
- page->index = em_start >> PAGE_SHIFT;
+ /* Reached stripe boundary, need to submit */
+ if (cur_disk_byte == next_stripe_start)
+ submit = true;
- if (comp_bio->bi_iter.bi_size)
- submit = btrfs_bio_fits_in_stripe(page, pg_len,
- comp_bio, 0);
+ /* Has finished the range, need to submit */
+ if (cur_disk_byte == disk_bytenr + compressed_len)
+ submit = true;
- page->mapping = NULL;
- if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
+ if (submit) {
unsigned int nr_sectors;
- ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
- BTRFS_WQ_ENDIO_DATA);
- BUG_ON(ret); /* -ENOMEM */
-
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the cb might get
- * freed before we're done setting it up
- */
- refcount_inc(&cb->pending_bios);
-
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto finish_cb;
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
fs_info->sectorsize);
sums += fs_info->csum_size * nr_sectors;
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
- if (ret) {
- comp_bio->bi_status = ret;
- bio_endio(comp_bio);
- }
-
- comp_bio = btrfs_bio_alloc(cur_disk_byte);
- comp_bio->bi_opf = REQ_OP_READ;
- comp_bio->bi_private = cb;
- comp_bio->bi_end_io = end_compressed_bio_read;
-
- bio_add_page(comp_bio, page, pg_len, 0);
+ ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num);
+ if (ret)
+ goto finish_cb;
+ comp_bio = NULL;
}
- cur_disk_byte += pg_len;
}
-
- ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
- BUG_ON(ret); /* -ENOMEM */
-
- ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
- BUG_ON(ret); /* -ENOMEM */
-
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
- if (ret) {
- comp_bio->bi_status = ret;
- bio_endio(comp_bio);
- }
-
return 0;
fail2:
@@ -842,6 +951,26 @@ fail1:
out:
free_extent_map(em);
return ret;
+finish_cb:
+ if (comp_bio) {
+ comp_bio->bi_status = ret;
+ bio_endio(comp_bio);
+ }
+ /* All bytes of @cb is submitted, endio will free @cb */
+ if (cur_disk_byte == disk_bytenr + compressed_len)
+ return ret;
+
+ wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
+ (disk_bytenr + compressed_len - cur_disk_byte) >>
+ fs_info->sectorsize_bits);
+ /*
+ * Even with previous bio ended, we should still have io not yet
+ * submitted, thus need to finish @cb manually.
+ */
+ ASSERT(refcount_read(&cb->pending_sectors));
+ /* Now we are the only one referring @cb, can finish it safely. */
+ finish_compressed_bio_read(cb, NULL);
+ return ret;
}
/*