From d88d46c6e06cb47cd3b951287ccaf414e96560d0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 10 Jun 2013 12:59:04 +0000 Subject: Btrfs: free csums when we're done scrubbing an extent A user reported scrub taking up an unreasonable amount of ram as it ran. This is because we lookup the csums for the extent we're scrubbing but don't free it up until after we're done with the scrub, which means we can take up a whole lot of ram. This patch fixes this by dropping the csums once we're done with the extent we've scrubbed. The user reported this to fix their problem. Thanks, Reported-and-tested-by: Remco Hosman Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 79bd479317cb..cb308a3a9300 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2505,6 +2505,7 @@ again: if (ret) goto out; + scrub_free_csums(sctx); if (extent_logical + extent_len < key.objectid + bytes) { logical += increment; -- cgit v1.2.3-59-g8ed1b From f51a4a1826ff810eb9c00cadff8978b028c40756 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Jun 2013 10:36:09 +0800 Subject: Btrfs: remove btrfs_sector_sum structure Using the structure btrfs_sector_sum to keep the checksum value is unnecessary, because the extents that btrfs_sector_sum points to are continuous, we can find out the expected checksums by btrfs_ordered_sum's bytenr and the offset, so we can remove btrfs_sector_sum's bytenr. After removing bytenr, there is only one member in the structure, so it makes no sense to keep the structure, just remove it, and use a u32 array to store the checksum value. By this change, we don't use the while loop to get the checksums one by one. Now, we can get several checksum value at one time, it improved the performance by ~74% on my SSD (31MB/s -> 54MB/s). test command: # dd if=/dev/zero of=/mnt/btrfs/file0 bs=1M count=1024 oflag=sync Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/file-item.c | 144 ++++++++++++++++++------------------------------ fs/btrfs/ordered-data.c | 19 +++---- fs/btrfs/ordered-data.h | 25 ++------- fs/btrfs/relocation.c | 14 +---- fs/btrfs/scrub.c | 16 ++---- 5 files changed, 76 insertions(+), 142 deletions(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b193bf324a41..a7bfc9541803 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,8 +34,7 @@ #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ sizeof(struct btrfs_ordered_sum)) / \ - sizeof(struct btrfs_sector_sum) * \ - (r)->sectorsize - (r)->sectorsize) + sizeof(u32) * (r)->sectorsize) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -297,7 +296,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_csum_item *item; LIST_HEAD(tmplist); unsigned long offset; @@ -368,34 +366,28 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct btrfs_csum_item); while (start < csum_end) { size = min_t(size_t, csum_end - start, - MAX_ORDERED_SUM_BYTES(root)); + MAX_ORDERED_SUM_BYTES(root)); sums = kzalloc(btrfs_ordered_sum_size(root, size), - GFP_NOFS); + GFP_NOFS); if (!sums) { ret = -ENOMEM; goto fail; } - sector_sum = sums->sums; sums->bytenr = start; - sums->len = size; + sums->len = (int)size; offset = (start - key.offset) >> root->fs_info->sb->s_blocksize_bits; offset *= csum_size; + size >>= root->fs_info->sb->s_blocksize_bits; - while (size > 0) { - read_extent_buffer(path->nodes[0], - §or_sum->sum, - ((unsigned long)item) + - offset, csum_size); - sector_sum->bytenr = start; - - size -= root->sectorsize; - start += root->sectorsize; - offset += csum_size; - sector_sum++; - } + read_extent_buffer(path->nodes[0], + sums->sums, + ((unsigned long)item) + offset, + csum_size * size); + + start += root->sectorsize * size; list_add_tail(&sums->list, &tmplist); } path->slots[0]++; @@ -417,23 +409,20 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig) { struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_ordered_extent *ordered; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; + int index; unsigned long total_bytes = 0; unsigned long this_sum_bytes = 0; u64 offset; - u64 disk_bytenr; WARN_ON(bio->bi_vcnt <= 0); sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - sector_sum = sums->sums; - disk_bytenr = (u64)bio->bi_sector << 9; sums->len = bio->bi_size; INIT_LIST_HEAD(&sums->list); @@ -444,7 +433,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, ordered = btrfs_lookup_ordered_extent(inode, offset); BUG_ON(!ordered); /* Logic error */ - sums->bytenr = ordered->start; + sums->bytenr = (u64)bio->bi_sector << 9; + index = 0; while (bio_index < bio->bi_vcnt) { if (!contig) @@ -463,28 +453,27 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), GFP_NOFS); BUG_ON(!sums); /* -ENOMEM */ - sector_sum = sums->sums; sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, offset); BUG_ON(!ordered); /* Logic error */ - sums->bytenr = ordered->start; + sums->bytenr = ((u64)bio->bi_sector << 9) + + total_bytes; + index = 0; } data = kmap_atomic(bvec->bv_page); - sector_sum->sum = ~(u32)0; - sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset, - sector_sum->sum, - bvec->bv_len); + sums->sums[index] = ~(u32)0; + sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, + sums->sums[index], + bvec->bv_len); kunmap_atomic(data); - btrfs_csum_final(sector_sum->sum, - (char *)§or_sum->sum); - sector_sum->bytenr = disk_bytenr; + btrfs_csum_final(sums->sums[index], + (char *)(sums->sums + index)); - sector_sum++; bio_index++; + index++; total_bytes += bvec->bv_len; this_sum_bytes += bvec->bv_len; - disk_bytenr += bvec->bv_len; offset += bvec->bv_len; bvec++; } @@ -672,62 +661,46 @@ out: return ret; } -static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums, - struct btrfs_sector_sum *sector_sum, - u64 total_bytes, u64 sectorsize) -{ - u64 tmp = sectorsize; - u64 next_sector = sector_sum->bytenr; - struct btrfs_sector_sum *next = sector_sum + 1; - - while ((tmp + total_bytes) < sums->len) { - if (next_sector + sectorsize != next->bytenr) - break; - tmp += sectorsize; - next_sector = next->bytenr; - next++; - } - return tmp; -} - int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) { - u64 bytenr; - int ret; struct btrfs_key file_key; struct btrfs_key found_key; - u64 next_offset; - u64 total_bytes = 0; - int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; + u64 next_offset; + u64 total_bytes = 0; u64 csum_offset; - struct btrfs_sector_sum *sector_sum; + u64 bytenr; u32 nritems; u32 ins_size; + int index = 0; + int found_next; + int ret; u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - - sector_sum = sums->sums; again: next_offset = (u64)-1; found_next = 0; + bytenr = sums->bytenr + total_bytes; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - file_key.offset = sector_sum->bytenr; - bytenr = sector_sum->bytenr; + file_key.offset = bytenr; btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); - item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1); + item = btrfs_lookup_csum(trans, root, path, bytenr, 1); if (!IS_ERR(item)) { - leaf = path->nodes[0]; ret = 0; + leaf = path->nodes[0]; + item_end = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_csum_item); + item_end = (struct btrfs_csum_item *)((char *)item_end + + btrfs_item_size_nr(leaf, path->slots[0])); goto found; } ret = PTR_ERR(item); @@ -807,8 +780,7 @@ again: free_space = btrfs_leaf_free_space(root, leaf) - sizeof(struct btrfs_item) - csum_size; - tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, - root->sectorsize); + tmp = sums->len - total_bytes; tmp >>= root->fs_info->sb->s_blocksize_bits; WARN_ON(tmp < 1); @@ -822,6 +794,7 @@ again: diff *= csum_size; btrfs_extend_item(root, path, diff); + ret = 0; goto csum; } @@ -831,8 +804,7 @@ insert: if (found_next) { u64 tmp; - tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, - root->sectorsize); + tmp = sums->len - total_bytes; tmp >>= root->fs_info->sb->s_blocksize_bits; tmp = min(tmp, (next_offset - file_key.offset) >> root->fs_info->sb->s_blocksize_bits); @@ -853,31 +825,25 @@ insert: WARN_ON(1); goto fail_unlock; } -csum: leaf = path->nodes[0]; +csum: item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); - ret = 0; + item_end = (struct btrfs_csum_item *)((unsigned char *)item + + btrfs_item_size_nr(leaf, path->slots[0])); item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * csum_size); found: - item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); - item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + - btrfs_item_size_nr(leaf, path->slots[0])); -next_sector: - - write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); - - total_bytes += root->sectorsize; - sector_sum++; - if (total_bytes < sums->len) { - item = (struct btrfs_csum_item *)((char *)item + - csum_size); - if (item < item_end && bytenr + PAGE_CACHE_SIZE == - sector_sum->bytenr) { - bytenr = sector_sum->bytenr; - goto next_sector; - } - } + ins_size = (u32)(sums->len - total_bytes) >> + root->fs_info->sb->s_blocksize_bits; + ins_size *= csum_size; + ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, + ins_size); + write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, + ins_size); + + ins_size /= csum_size; + total_bytes += ins_size * root->sectorsize; + index += ins_size; btrfs_mark_buffer_dirty(path->nodes[0]); if (total_bytes < sums->len) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 665c640e3ea6..81369827e514 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1032,7 +1032,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum, int len) { struct btrfs_ordered_sum *ordered_sum; - struct btrfs_sector_sum *sector_sums; struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; unsigned long num_sectors; @@ -1050,18 +1049,16 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { i = (disk_bytenr - ordered_sum->bytenr) >> inode->i_sb->s_blocksize_bits; - sector_sums = ordered_sum->sums + i; num_sectors = ordered_sum->len >> inode->i_sb->s_blocksize_bits; - for (; i < num_sectors; i++) { - if (sector_sums[i].bytenr == disk_bytenr) { - sum[index] = sector_sums[i].sum; - index++; - if (index == len) - goto out; - disk_bytenr += sectorsize; - } - } + num_sectors = min_t(int, len - index, num_sectors - i); + memcpy(sum + index, ordered_sum->sums + i, + num_sectors); + + index += (int)num_sectors; + if (index == len) + goto out; + disk_bytenr += num_sectors * sectorsize; } } out: diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index d082d43e00e5..68844d59ee6f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -26,18 +26,6 @@ struct btrfs_ordered_inode_tree { struct rb_node *last; }; -/* - * these are used to collect checksums done just before bios submission. - * They are attached via a list into the ordered extent, and - * checksum items are inserted into the tree after all the blocks in - * the ordered extent are on disk - */ -struct btrfs_sector_sum { - /* bytenr on disk */ - u64 bytenr; - u32 sum; -}; - struct btrfs_ordered_sum { /* bytenr is the start of this extent on disk */ u64 bytenr; @@ -45,10 +33,10 @@ struct btrfs_ordered_sum { /* * this is the length in bytes covered by the sums array below. */ - unsigned long len; + int len; struct list_head list; - /* last field is a variable length array of btrfs_sector_sums */ - struct btrfs_sector_sum sums[]; + /* last field is a variable length array of csums */ + u32 sums[]; }; /* @@ -149,11 +137,8 @@ struct btrfs_ordered_extent { static inline int btrfs_ordered_sum_size(struct btrfs_root *root, unsigned long bytes) { - unsigned long num_sectors = (bytes + root->sectorsize - 1) / - root->sectorsize; - num_sectors++; - return sizeof(struct btrfs_ordered_sum) + - num_sectors * sizeof(struct btrfs_sector_sum); + int num_sectors = (int)DIV_ROUND_UP(bytes, root->sectorsize); + return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32); } static inline void diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d91f106df665..12096496cc99 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4458,10 +4458,8 @@ out: int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) { struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - size_t offset; int ret; u64 disk_bytenr; LIST_HEAD(list); @@ -4475,19 +4473,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) if (ret) goto out; + disk_bytenr = ordered->start; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - sector_sum = sums->sums; - sums->bytenr = ordered->start; - - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + sums->bytenr = disk_bytenr; + disk_bytenr += sums->len; btrfs_add_ordered_sum(inode, ordered, sums); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index cb308a3a9300..63144e4ca9e1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2126,8 +2126,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, u8 *csum) { struct btrfs_ordered_sum *sum = NULL; - int ret = 0; - unsigned long i; + unsigned long index; unsigned long num_sectors; while (!list_empty(&sctx->csum_list)) { @@ -2146,19 +2145,14 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, if (!sum) return 0; + index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize; num_sectors = sum->len / sctx->sectorsize; - for (i = 0; i < num_sectors; ++i) { - if (sum->sums[i].bytenr == logical) { - memcpy(csum, &sum->sums[i].sum, sctx->csum_size); - ret = 1; - break; - } - } - if (ret && i == num_sectors - 1) { + memcpy(csum, sum->sums + index, sctx->csum_size); + if (index == num_sectors - 1) { list_del(&sum->list); kfree(sum); } - return ret; + return 1; } /* scrub extent tries to collect up to 64 kB for each bio */ -- cgit v1.2.3-59-g8ed1b From 26b258919006fc2d76a50b8247d7dea73207b583 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 27 Jun 2013 18:50:58 +0800 Subject: Btrfs: fix oops when recovering the file data by scrub function We get oops while running btrfs replace start test, ------------[ cut here ]------------ kernel BUG at mm/filemap.c:608! [SNIP] Call Trace: [] copy_nocow_pages_for_inode+0x217/0x3f0 [btrfs] [] ? scrub_print_warning_inode+0x230/0x230 [btrfs] [] ? scrub_print_warning_inode+0x230/0x230 [btrfs] [] iterate_extent_inodes+0x1ae/0x300 [btrfs] [] iterate_inodes_from_logical+0x92/0xb0 [btrfs] [] ? scrub_print_warning_inode+0x230/0x230 [btrfs] [] copy_nocow_pages_worker+0x97/0x150 [btrfs] [] worker_loop+0x134/0x540 [btrfs] [] ? __schedule+0x3ca/0x7f0 [] ? btrfs_queue_worker+0x300/0x300 [btrfs] [] kthread+0xc0/0xd0 [] ? flush_kthread_worker+0x80/0x80 [] ret_from_fork+0x7c/0xb0 [] ? flush_kthread_worker+0x80/0x80 [SNIP] RIP [] unlock_page+0x35/0x40 RSP ---[ end trace 421e79ad0dd72c7d ]--- it is because we forgot to lock the page again after we read data to the page. Fix it. Signed-off-by: Lin Feng Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 63144e4ca9e1..c1647f8c1cd0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3258,7 +3258,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) ret = ret_sub; goto next_page; } - wait_on_page_locked(page); + lock_page(page); if (!PageUptodate(page)) { ret = -EIO; goto next_page; -- cgit v1.2.3-59-g8ed1b From 826aa0a82c5b9d2c8016c02b552e8f82f5b1e660 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 27 Jun 2013 18:50:59 +0800 Subject: Btrfs: cleanup the code of copy_nocow_pages_for_inode() - It make no sense that we continue to do something after the error happened, just go back with this patch. - remove some check of copy_nocow_pages_for_inode(), such as page check after write, inode check in the end of the function, because we are sure they exist. - remove the unnecessary goto in the return value check of the write Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c1647f8c1cd0..186ea82b75f7 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3199,16 +3199,18 @@ out: static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) { - unsigned long index; struct scrub_copy_nocow_ctx *nocow_ctx = ctx; - int ret = 0; + struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; struct btrfs_key key; - struct inode *inode = NULL; + struct inode *inode; + struct page *page; struct btrfs_root *local_root; u64 physical_for_dev_replace; u64 len; - struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; + unsigned long index; int srcu_index; + int ret; + int err; key.objectid = root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -3230,19 +3232,17 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) if (IS_ERR(inode)) return PTR_ERR(inode); + ret = 0; physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; len = nocow_ctx->len; while (len >= PAGE_CACHE_SIZE) { - struct page *page = NULL; - int ret_sub; - index = offset >> PAGE_CACHE_SHIFT; page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) { pr_err("find_or_create_page() failed\n"); ret = -ENOMEM; - goto next_page; + goto out; } if (PageUptodate(page)) { @@ -3250,12 +3250,12 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) goto next_page; } else { ClearPageError(page); - ret_sub = extent_read_full_page(&BTRFS_I(inode)-> + err = extent_read_full_page(&BTRFS_I(inode)-> io_tree, page, btrfs_get_extent, nocow_ctx->mirror_num); - if (ret_sub) { - ret = ret_sub; + if (err) { + ret = err; goto next_page; } lock_page(page); @@ -3264,25 +3264,23 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) goto next_page; } } - ret_sub = write_page_nocow(nocow_ctx->sctx, - physical_for_dev_replace, page); - if (ret_sub) { - ret = ret_sub; - goto next_page; - } - + err = write_page_nocow(nocow_ctx->sctx, + physical_for_dev_replace, page); + if (err) + ret = err; next_page: - if (page) { - unlock_page(page); - put_page(page); - } + unlock_page(page); + page_cache_release(page); + + if (ret) + break; + offset += PAGE_CACHE_SIZE; physical_for_dev_replace += PAGE_CACHE_SIZE; len -= PAGE_CACHE_SIZE; } - - if (inode) - iput(inode); +out: + iput(inode); return ret; } -- cgit v1.2.3-59-g8ed1b From edd1400be9f983f521c7397740d810fa210ee52f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 27 Jun 2013 18:51:00 +0800 Subject: Btrfs: fix several potential problems in copy_nocow_pages_for_inode - It makes no sense that we deal with a inode in the dead tree. - fix the race between dio and page copy by waiting the dio completion - avoid the page copy vs truncate/punch hole - check if the page is in the page cache or not Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/scrub.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/scrub.c') diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 186ea82b75f7..4ba2a69a60ad 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3224,6 +3224,11 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) return PTR_ERR(local_root); } + if (btrfs_root_refs(&local_root->root_item) == 0) { + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); + return -ENOENT; + } + key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; @@ -3232,12 +3237,16 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) if (IS_ERR(inode)) return PTR_ERR(inode); + /* Avoid truncate/dio/punch hole.. */ + mutex_lock(&inode->i_mutex); + inode_dio_wait(inode); + ret = 0; physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; len = nocow_ctx->len; while (len >= PAGE_CACHE_SIZE) { index = offset >> PAGE_CACHE_SHIFT; - +again: page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) { pr_err("find_or_create_page() failed\n"); @@ -3258,7 +3267,18 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) ret = err; goto next_page; } + lock_page(page); + /* + * If the page has been remove from the page cache, + * the data on it is meaningless, because it may be + * old one, the new data may be written into the new + * page in the page cache. + */ + if (page->mapping != inode->i_mapping) { + page_cache_release(page); + goto again; + } if (!PageUptodate(page)) { ret = -EIO; goto next_page; @@ -3280,6 +3300,7 @@ next_page: len -= PAGE_CACHE_SIZE; } out: + mutex_unlock(&inode->i_mutex); iput(inode); return ret; } -- cgit v1.2.3-59-g8ed1b