From 2996e1f8bcadf0436cc67b63af01523f6cf5d43f Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 25 Feb 2019 14:24:15 +0100 Subject: btrfs: factor our read/write stage off csum_tree_block into its callers Currently csum_tree_block() does two things, first it as it's name suggests it calculates the checksum for a tree-block. But it also writes this checksum to disk or reads an extent_buffer from disk and compares the checksum with the calculated checksum, depending on the verify argument. Furthermore one of the two callers passes in '1' for the verify argument, the other one passes in '0'. For clarity and less layering violations, factor out the second stage in csum_tree_block()'s callers. Suggested-by: Nikolay Borisov Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 55 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6fe9197f6ee4..4f11a83304ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result) } /* - * compute the csum for a btree block, and either verify it or write it - * into the csum field of the block. + * Compute the csum of a btree block and store the result to provided buffer. + * + * Returns error if the extent buffer cannot be mapped. */ -static int csum_tree_block(struct btrfs_fs_info *fs_info, - struct extent_buffer *buf, - int verify) +static int csum_tree_block(struct extent_buffer *buf, u8 *result) { - u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char result[BTRFS_CSUM_SIZE]; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, btrfs_csum_final(crc, result); - if (verify) { - if (memcmp_extent_buffer(buf, result, 0, csum_size)) { - u32 val; - u32 found = 0; - memcpy(&found, result, csum_size); - - read_extent_buffer(buf, &val, 0, csum_size); - btrfs_warn_rl(fs_info, - "%s checksum verify failed on %llu wanted %X found %X level %d", - fs_info->sb->s_id, buf->start, - val, found, btrfs_header_level(buf)); - return -EUCLEAN; - } - } else { - write_extent_buffer(buf, result, 0, csum_size); - } - return 0; } @@ -533,6 +513,8 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) { u64 start = page_offset(page); u64 found_start; + u8 result[BTRFS_CSUM_SIZE]; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); struct extent_buffer *eb; eb = (struct extent_buffer *)page->private; @@ -552,7 +534,11 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); - return csum_tree_block(fs_info, eb, 0); + if (csum_tree_block(eb, result)) + return -EINVAL; + + write_extent_buffer(eb, result, 0, csum_size); + return 0; } static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, @@ -595,7 +581,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int ret = 0; + u8 result[BTRFS_CSUM_SIZE]; int reads_done; if (!page->private) @@ -642,10 +630,25 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); - ret = csum_tree_block(fs_info, eb, 1); + ret = csum_tree_block(eb, result); if (ret) goto err; + if (memcmp_extent_buffer(eb, result, 0, csum_size)) { + u32 val; + u32 found = 0; + + memcpy(&found, result, csum_size); + + read_extent_buffer(eb, &val, 0, csum_size); + btrfs_warn_rl(fs_info, + "%s checksum verify failed on %llu wanted %x found %x level %d", + fs_info->sb->s_id, eb->start, + val, found, btrfs_header_level(eb)); + ret = -EUCLEAN; + goto err; + } + /* * If this is a leaf block and it is corrupt, set the corrupt bit so * that we don't try and read the other copies of this block, just -- cgit v1.2.3-59-g8ed1b From c53839fc3217085c210e1a62022016f199ae7b5a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 25 Feb 2019 14:24:16 +0100 Subject: btrfs: warn if extent buffer mapping crosses a page boundary in csum_tree_block Since commit d2e174d5d3ee ("btrfs: document extent mapping assumptions in checksum") we have a comment in place why map_private_extent_buffer() can't return 1 in the csum_tree_block() case. Make this a bit more explicit and WARN_ON() in case this this assumption breaks. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f11a83304ae..c4404e1e9cfb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -285,7 +285,7 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result) */ err = map_private_extent_buffer(buf, offset, 32, &kaddr, &map_start, &map_len); - if (err) + if (WARN_ON(err)) return err; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(kaddr + offset - map_start, -- cgit v1.2.3-59-g8ed1b From 7ac1e464c4d473b517bb784f30d40da1f842482e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 26 Feb 2019 16:33:56 +0800 Subject: btrfs: Don't panic when we can't find a root key When we failed to find a root key in btrfs_update_root(), we just panic. That's definitely not cool, fix it by outputting an unique error message, aborting current transaction and return -EUCLEAN. This should not normally happen as the root has been used by the callers in some way. Reviewed-by: Filipe Manana Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/root-tree.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 893d12fbfda0..1b9a5d0de139 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -137,11 +137,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - if (ret != 0) { - btrfs_print_leaf(path->nodes[0]); - btrfs_crit(fs_info, "unable to update root key %llu %u %llu", - key->objectid, key->type, key->offset); - BUG_ON(1); + if (ret > 0) { + btrfs_crit(fs_info, + "unable to find root key (%llu %u %llu) in tree %llu", + key->objectid, key->type, key->offset, + root->root_key.objectid); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + goto out; } l = path->nodes[0]; -- cgit v1.2.3-59-g8ed1b From 8de60fe94292c8cdac431537ba315e15cdcafd00 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Mon, 25 Feb 2019 13:07:43 -0600 Subject: btrfs: Initialize inode::i_mapping once in btrfs_symlink inode->i_op is initialized multiple times. Perform it once. This was left by 4779cc04248d ("Btrfs: get rid of btrfs_symlink_aops"). Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82fdda8ff5ab..cef875a2c475 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10193,7 +10193,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode_nohighmem(inode); - inode->i_mapping->a_ops = &btrfs_aops; inode_set_bytes(inode, name_len); btrfs_i_size_write(BTRFS_I(inode), name_len); err = btrfs_update_inode(trans, root, inode); -- cgit v1.2.3-59-g8ed1b From 6c3abeda773040ad2338742b3ee95d93eb5565e1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 25 Feb 2019 19:57:41 +0100 Subject: btrfs: scrub: return EAGAIN when fs is closing The error code used here is wrong as it's not invalid to try to start scrub when umount has begun. Returning EAGAIN is more user friendly as it's recoverable. Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a99588536c79..ed471ffbf115 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3791,7 +3791,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, struct btrfs_workqueue *scrub_parity = NULL; if (btrfs_fs_closing(fs_info)) - return -EINVAL; + return -EAGAIN; if (fs_info->nodesize > BTRFS_STRIPE_LEN) { /* -- cgit v1.2.3-59-g8ed1b From b2423496a65691f94defddd42daa172824b5fd25 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 27 Feb 2019 16:21:28 -0500 Subject: btrfs: zstd: remove indirect calls for local functions While calling functions inside zstd, we don't need to use the indirection provided by the workspace_manager. Forward declarations are added to maintain the function order of btrfs_compress_op. Signed-off-by: Dennis Zhou Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zstd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 6b9e29d050f3..a6ff07cf11d5 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -90,6 +90,9 @@ static inline struct workspace *list_to_workspace(struct list_head *list) return container_of(list, struct workspace, list); } +static void zstd_free_workspace(struct list_head *ws); +static struct list_head *zstd_alloc_workspace(unsigned int level); + /* * zstd_reclaim_timer_fn - reclaim timer * @t: timer @@ -124,7 +127,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) level = victim->level; list_del(&victim->lru_list); list_del(&victim->list); - wsm.ops->free_workspace(&victim->list); + zstd_free_workspace(&victim->list); if (list_empty(&wsm.idle_ws[level - 1])) clear_bit(level - 1, &wsm.active_map); @@ -180,7 +183,7 @@ static void zstd_init_workspace_manager(void) for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) INIT_LIST_HEAD(&wsm.idle_ws[i]); - ws = wsm.ops->alloc_workspace(ZSTD_BTRFS_MAX_LEVEL); + ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL); if (IS_ERR(ws)) { pr_warn( "BTRFS: cannot preallocate zstd compression workspace\n"); @@ -202,7 +205,7 @@ static void zstd_cleanup_workspace_manager(void) struct workspace, list); list_del(&workspace->list); list_del(&workspace->lru_list); - wsm.ops->free_workspace(&workspace->list); + zstd_free_workspace(&workspace->list); } } spin_unlock(&wsm.lock); @@ -272,7 +275,7 @@ again: return ws; nofs_flag = memalloc_nofs_save(); - ws = wsm.ops->alloc_workspace(level); + ws = zstd_alloc_workspace(level); memalloc_nofs_restore(nofs_flag); if (IS_ERR(ws)) { -- cgit v1.2.3-59-g8ed1b From 3b1da515c64e18bdd6a13348313f1168396b3722 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 Mar 2019 13:10:56 +0000 Subject: Btrfs: remove no longer used 'sync' member from transaction handle Commit db2462a6ad3d ("btrfs: don't run delayed refs in the end transaction logic") removed its last use, so now it does absolutely nothing, therefore remove it. Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 ------ fs/btrfs/file.c | 1 - fs/btrfs/transaction.h | 1 - 3 files changed, 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5880329ae37..b0c86a817a99 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2910,12 +2910,6 @@ static void delayed_ref_async_start(struct btrfs_work *work) goto done; } - /* - * trans->sync means that when we call end_transaction, we won't - * wait on delayed refs - */ - trans->sync = true; - /* Don't bother flushing if we got into a different transaction */ if (trans->transid > async->transid) goto end; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 34fe8a58b0e9..94c1c86fd18a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2165,7 +2165,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) inode_unlock(inode); goto out; } - trans->sync = true; ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx); if (ret < 0) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f1ba78949d1b..b34678e7968e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -120,7 +120,6 @@ struct btrfs_trans_handle { bool allocating_chunk; bool can_flush_pending_bgs; bool reloc_reserved; - bool sync; bool dirty; struct btrfs_root *root; struct btrfs_fs_info *fs_info; -- cgit v1.2.3-59-g8ed1b From c258d6e36442eb5d3363f6dbc0e6f2c162bfb66d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 1 Mar 2019 10:47:58 +0800 Subject: btrfs: Introduce fs_info to extent_io_tree This patch will add a new member fs_info to extent_io_tree. This provides the basis for later trace events to distinguish the output between different btrfs filesystems. While this increases the size of the structure, we want to know the source of the trace events and passing the fs_info as an argument to all contexts is not possible. The selftests are now allowed to set it to NULL as they don't use the tracepoints. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent_io.c | 5 +++-- fs/btrfs/extent_io.h | 4 +++- fs/btrfs/inode.c | 4 ++-- fs/btrfs/relocation.c | 8 ++++---- fs/btrfs/tests/btrfs-tests.c | 4 ++-- fs/btrfs/tests/extent-io-tests.c | 2 +- fs/btrfs/transaction.c | 2 +- 8 files changed, 20 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c4404e1e9cfb..26493c2fc237 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1211,7 +1211,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->log_transid_committed = -1; root->last_log_commit = 0; if (!dummy) - extent_io_tree_init(&root->dirty_log_pages, NULL); + extent_io_tree_init(fs_info, &root->dirty_log_pages, NULL); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); @@ -2141,7 +2141,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) inode->i_mapping->a_ops = &btree_aops; RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode); + extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, inode); BTRFS_I(inode)->io_tree.track_uptodate = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree); @@ -2751,8 +2751,8 @@ int open_ctree(struct super_block *sb, fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(&fs_info->freed_extents[0], NULL); - extent_io_tree_init(&fs_info->freed_extents[1], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[0], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[1], NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; set_bit(BTRFS_FS_BARRIER, &fs_info->flags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ca8b8e785cf3..139f2fe3092f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -232,9 +232,10 @@ void __cold extent_io_exit(void) bioset_exit(&btrfs_bioset); } -void extent_io_tree_init(struct extent_io_tree *tree, - void *private_data) +void extent_io_tree_init(struct btrfs_fs_info *fs_info, + struct extent_io_tree *tree, void *private_data) { + tree->fs_info = fs_info; tree->state = RB_ROOT; tree->ops = NULL; tree->dirty_bytes = 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 08749e0b9c32..e63215d69299 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -108,6 +108,7 @@ struct extent_io_ops { struct extent_io_tree { struct rb_root state; + struct btrfs_fs_info *fs_info; void *private_data; u64 dirty_bytes; int track_uptodate; @@ -239,7 +240,8 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, u64 start, u64 len, int create); -void extent_io_tree_init(struct extent_io_tree *tree, void *private_data); +void extent_io_tree_init(struct btrfs_fs_info *fs_info, + struct extent_io_tree *tree, void *private_data); int try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cef875a2c475..2436bc50f21d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9182,8 +9182,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree); - extent_io_tree_init(&ei->io_tree, inode); - extent_io_tree_init(&ei->io_failure_tree, inode); + extent_io_tree_init(fs_info, &ei->io_tree, inode); + extent_io_tree_init(fs_info, &ei->io_failure_tree, inode); ei->io_tree.track_uptodate = 1; ei->io_failure_tree.track_uptodate = 1; atomic_set(&ei->sync_writers, 0); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ddf028509931..955da7baa665 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4222,7 +4222,7 @@ out: return inode; } -static struct reloc_control *alloc_reloc_control(void) +static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) { struct reloc_control *rc; @@ -4234,7 +4234,7 @@ static struct reloc_control *alloc_reloc_control(void) INIT_LIST_HEAD(&rc->dirty_subvol_roots); backref_cache_init(&rc->backref_cache); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL); + extent_io_tree_init(fs_info, &rc->processed_blocks, NULL); return rc; } @@ -4276,7 +4276,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) return -ETXTBSY; } - rc = alloc_reloc_control(); + rc = alloc_reloc_control(fs_info); if (!rc) { btrfs_put_block_group(bg); return -ENOMEM; @@ -4472,7 +4472,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (list_empty(&reloc_roots)) goto out; - rc = alloc_reloc_control(); + rc = alloc_reloc_control(fs_info); if (!rc) { err = -ENOMEM; goto out; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 8a59597f1883..cc1e5d017dc0 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -115,8 +115,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); - extent_io_tree_init(&fs_info->freed_extents[0], NULL); - extent_io_tree_init(&fs_info->freed_extents[1], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[0], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[1], NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 3c46d7f23456..6ac9770a974c 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -77,7 +77,7 @@ static int test_find_delalloc(u32 sectorsize) return -ENOMEM; } - extent_io_tree_init(&tmp, NULL); + extent_io_tree_init(NULL, &tmp, NULL); /* * First go through and create and mark all of our pages dirty, we pin diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e4e665f422fc..bc8ed44ad8c8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,7 +274,7 @@ loop: INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); - extent_io_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(fs_info, &cur_trans->dirty_pages, fs_info->btree_inode); fs_info->generation++; cur_trans->transid = fs_info->generation; -- cgit v1.2.3-59-g8ed1b From 7b4397386fbdc606eb053bc2a1cfd985aea59916 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 11 Mar 2019 15:58:30 +0100 Subject: btrfs: switch extent_io_tree::track_uptodate to bool This patch is split from the following one "btrfs: Introduce extent_io_tree::owner to distinguish different io_trees" from Qu, so the different changes are not mixed together. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26493c2fc237..130796639f88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2142,7 +2142,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, inode); - BTRFS_I(inode)->io_tree.track_uptodate = 0; + BTRFS_I(inode)->io_tree.track_uptodate = false; extent_map_tree_init(&BTRFS_I(inode)->extent_tree); BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e63215d69299..bd5c12599057 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -111,7 +111,7 @@ struct extent_io_tree { struct btrfs_fs_info *fs_info; void *private_data; u64 dirty_bytes; - int track_uptodate; + bool track_uptodate; spinlock_t lock; const struct extent_io_ops *ops; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2436bc50f21d..8f0045ba27d4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9184,8 +9184,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) extent_map_tree_init(&ei->extent_tree); extent_io_tree_init(fs_info, &ei->io_tree, inode); extent_io_tree_init(fs_info, &ei->io_failure_tree, inode); - ei->io_tree.track_uptodate = 1; - ei->io_failure_tree.track_uptodate = 1; + ei->io_tree.track_uptodate = true; + ei->io_failure_tree.track_uptodate = true; atomic_set(&ei->sync_writers, 0); mutex_init(&ei->log_mutex); mutex_init(&ei->delalloc_mutex); -- cgit v1.2.3-59-g8ed1b From 43eb5f2975848743e5b14c5bef20f40d404a7a04 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 1 Mar 2019 10:47:59 +0800 Subject: btrfs: Introduce extent_io_tree::owner to distinguish different io_trees Btrfs has the following different extent_io_trees used: - fs_info::free_extents[2] - btrfs_inode::io_tree - for both normal inodes and the btree inode - btrfs_inode::io_failure_tree - btrfs_transaction::dirty_pages - btrfs_root::dirty_log_pages If we want to trace changes in those trees, it will be pretty hard to distinguish them. Instead of using hard-to-read pointer address, this patch will introduce a new member extent_io_tree::owner to track the owner. This modification needs all the callers of extent_io_tree_init() to accept a new parameter @owner. This patch provides the basis for later trace events. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 12 ++++++++---- fs/btrfs/extent_io.c | 4 +++- fs/btrfs/extent_io.h | 18 +++++++++++++++++- fs/btrfs/inode.c | 5 +++-- fs/btrfs/relocation.c | 3 ++- fs/btrfs/tests/btrfs-tests.c | 6 ++++-- fs/btrfs/tests/extent-io-tests.c | 6 +++++- fs/btrfs/transaction.c | 2 +- 8 files changed, 43 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 130796639f88..46b368d84aa3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1211,7 +1211,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->log_transid_committed = -1; root->last_log_commit = 0; if (!dummy) - extent_io_tree_init(fs_info, &root->dirty_log_pages, NULL); + extent_io_tree_init(fs_info, &root->dirty_log_pages, + IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); @@ -2141,7 +2142,8 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) inode->i_mapping->a_ops = &btree_aops; RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); - extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, inode); + extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, + IO_TREE_INODE_IO, inode); BTRFS_I(inode)->io_tree.track_uptodate = false; extent_map_tree_init(&BTRFS_I(inode)->extent_tree); @@ -2751,8 +2753,10 @@ int open_ctree(struct super_block *sb, fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(fs_info, &fs_info->freed_extents[0], NULL); - extent_io_tree_init(fs_info, &fs_info->freed_extents[1], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[0], + IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[1], + IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; set_bit(BTRFS_FS_BARRIER, &fs_info->flags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 139f2fe3092f..cfd2b237fd66 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -233,7 +233,8 @@ void __cold extent_io_exit(void) } void extent_io_tree_init(struct btrfs_fs_info *fs_info, - struct extent_io_tree *tree, void *private_data) + struct extent_io_tree *tree, unsigned int owner, + void *private_data) { tree->fs_info = fs_info; tree->state = RB_ROOT; @@ -241,6 +242,7 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info, tree->dirty_bytes = 0; spin_lock_init(&tree->lock); tree->private_data = private_data; + tree->owner = owner; } static struct extent_state *alloc_extent_state(gfp_t mask) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bd5c12599057..aef7a46b1e61 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -106,12 +106,27 @@ struct extent_io_ops { int mirror); }; +enum { + IO_TREE_FS_INFO_FREED_EXTENTS0, + IO_TREE_FS_INFO_FREED_EXTENTS1, + IO_TREE_INODE_IO, + IO_TREE_INODE_IO_FAILURE, + IO_TREE_RELOC_BLOCKS, + IO_TREE_TRANS_DIRTY_PAGES, + IO_TREE_ROOT_DIRTY_LOG_PAGES, + IO_TREE_SELFTEST, +}; + struct extent_io_tree { struct rb_root state; struct btrfs_fs_info *fs_info; void *private_data; u64 dirty_bytes; bool track_uptodate; + + /* Who owns this io tree, should be one of IO_TREE_* */ + u8 owner; + spinlock_t lock; const struct extent_io_ops *ops; }; @@ -241,7 +256,8 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, int create); void extent_io_tree_init(struct btrfs_fs_info *fs_info, - struct extent_io_tree *tree, void *private_data); + struct extent_io_tree *tree, unsigned int owner, + void *private_data); int try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f0045ba27d4..a59619631194 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9182,8 +9182,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree); - extent_io_tree_init(fs_info, &ei->io_tree, inode); - extent_io_tree_init(fs_info, &ei->io_failure_tree, inode); + extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); + extent_io_tree_init(fs_info, &ei->io_failure_tree, + IO_TREE_INODE_IO_FAILURE, inode); ei->io_tree.track_uptodate = true; ei->io_failure_tree.track_uptodate = true; atomic_set(&ei->sync_writers, 0); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 955da7baa665..b52fb99646dc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4234,7 +4234,8 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&rc->dirty_subvol_roots); backref_cache_init(&rc->backref_cache); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(fs_info, &rc->processed_blocks, NULL); + extent_io_tree_init(fs_info, &rc->processed_blocks, + IO_TREE_RELOC_BLOCKS, NULL); return rc; } diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index cc1e5d017dc0..1351ac2afdd2 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -115,8 +115,10 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); - extent_io_tree_init(fs_info, &fs_info->freed_extents[0], NULL); - extent_io_tree_init(fs_info, &fs_info->freed_extents[1], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[0], + IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[1], + IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 6ac9770a974c..e46ed2985b19 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -77,7 +77,11 @@ static int test_find_delalloc(u32 sectorsize) return -ENOMEM; } - extent_io_tree_init(NULL, &tmp, NULL); + /* + * Passing NULL as we don't have fs_info but tracepoints are not used + * at this point + */ + extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL); /* * First go through and create and mark all of our pages dirty, we pin diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bc8ed44ad8c8..f1732b77a379 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -275,7 +275,7 @@ loop: spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, - fs_info->btree_inode); + IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; -- cgit v1.2.3-59-g8ed1b From a1d198478e92cc8f05c26be746edd1c58f756c0f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 1 Mar 2019 10:48:00 +0800 Subject: btrfs: tracepoints: Add trace events for extent_io_tree Although btrfs heavily relies on extent_io_tree, we don't really have any good trace events for them. This patch will add the folowing trace events: - trace_btrfs_set_extent_bit() - trace_btrfs_clear_extent_bit() - trace_btrfs_convert_extent_bit() Since selftests could create temporary extent_io_tree without fs_info, modify TP_fast_assign_fsid() to accept NULL as fs_info. NULL fs_info will lead to all zero fsid. The output would be: btrfs_set_extent_bit: : io_tree=INODE_IO ino=1 root=1 start=22036480 len=4096 set_bits=LOCKED btrfs_set_extent_bit: : io_tree=INODE_IO ino=1 root=1 start=22040576 len=4096 set_bits=LOCKED btrfs_set_extent_bit: : io_tree=INODE_IO ino=1 root=1 start=22044672 len=4096 set_bits=LOCKED btrfs_set_extent_bit: : io_tree=INODE_IO ino=1 root=1 start=22048768 len=4096 set_bits=LOCKED btrfs_clear_extent_bit: : io_tree=INODE_IO ino=1 root=1 start=22036480 len=16384 clear_bits=LOCKED ^^^ Extent buffer 22036480 read from disk, the locking progress btrfs_set_extent_bit: : io_tree=TRANS_DIRTY_PAGES ino=1 root=1 start=30425088 len=16384 set_bits=DIRTY btrfs_set_extent_bit: : io_tree=TRANS_DIRTY_PAGES ino=1 root=1 start=30441472 len=16384 set_bits=DIRTY ^^^ 2 new tree blocks allocated in one transaction btrfs_set_extent_bit: : io_tree=FREED_EXTENTS0 ino=0 root=0 start=30523392 len=16384 set_bits=DIRTY btrfs_set_extent_bit: : io_tree=FREED_EXTENTS0 ino=0 root=0 start=30556160 len=16384 set_bits=DIRTY ^^^ 2 old tree blocks get pinned down There is one point which need attention: 1) Those trace events can be pretty heavy: The following workload would generate over 400 trace events. mkfs.btrfs -f $dev start_trace mount $dev $mnt -o enospc_debug sync touch $mnt/file1 touch $mnt/file2 touch $mnt/file3 xfs_io -f -c "pwrite 0 16k" $mnt/file4 umount $mnt end_trace It's not recommended to use them in real world environment. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ rename enums ] Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 ++ include/trace/events/btrfs.h | 158 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cfd2b237fd66..ee246a7e14c5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -614,6 +614,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int clear = 0; btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); if (bits & EXTENT_DELALLOC) bits |= EXTENT_NORESERVE; @@ -883,6 +884,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_end; btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); again: if (!prealloc && gfpflags_allow_blocking(mask)) { @@ -1115,6 +1117,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, bool first_iteration = true; btrfs_debug_check_extent_io_range(tree, start, end); + trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, + clear_bits); again: if (!prealloc) { diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ab1cc33adbac..74a11b23b7d4 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -27,6 +27,7 @@ struct btrfs_work; struct __btrfs_workqueue; struct btrfs_qgroup_extent_record; struct btrfs_qgroup; +struct extent_io_tree; struct prelim_ref; TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR); @@ -77,6 +78,17 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" }, \ { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" }) +#define show_extent_io_tree_owner(owner) \ + __print_symbolic(owner, \ + { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" }, \ + { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" }, \ + { IO_TREE_INODE_IO, "INODE_IO" }, \ + { IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \ + { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ + { IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \ + { IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \ + { IO_TREE_SELFTEST, "SELFTEST" }) + #define BTRFS_GROUP_FLAGS \ { BTRFS_BLOCK_GROUP_DATA, "DATA"}, \ { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \ @@ -88,11 +100,35 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); { BTRFS_BLOCK_GROUP_RAID5, "RAID5"}, \ { BTRFS_BLOCK_GROUP_RAID6, "RAID6"} +#define EXTENT_FLAGS \ + { EXTENT_DIRTY, "DIRTY"}, \ + { EXTENT_WRITEBACK, "WRITEBACK"}, \ + { EXTENT_UPTODATE, "UPTODATE"}, \ + { EXTENT_LOCKED, "LOCKED"}, \ + { EXTENT_NEW, "NEW"}, \ + { EXTENT_DELALLOC, "DELALLOC"}, \ + { EXTENT_DEFRAG, "DEFRAG"}, \ + { EXTENT_BOUNDARY, "BOUNDARY"}, \ + { EXTENT_NODATASUM, "NODATASUM"}, \ + { EXTENT_CLEAR_META_RESV, "CLEAR_META_RESV"}, \ + { EXTENT_NEED_WAIT, "NEED_WAIT"}, \ + { EXTENT_DAMAGED, "DAMAGED"}, \ + { EXTENT_NORESERVE, "NORESERVE"}, \ + { EXTENT_QGROUP_RESERVED, "QGROUP_RESERVED"}, \ + { EXTENT_CLEAR_DATA_RESV, "CLEAR_DATA_RESV"}, \ + { EXTENT_DELALLOC_NEW, "DELALLOC_NEW"} + #define BTRFS_FSID_SIZE 16 #define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_FSID_SIZE) #define TP_fast_assign_fsid(fs_info) \ - memcpy(__entry->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) +({ \ + if (fs_info) \ + memcpy(__entry->fsid, fs_info->fs_devices->fsid, \ + BTRFS_FSID_SIZE); \ + else \ + memset(__entry->fsid, 0, BTRFS_FSID_SIZE); \ +}) #define TP_STRUCT__entry_btrfs(args...) \ TP_STRUCT__entry( \ @@ -1850,6 +1886,126 @@ DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group, TP_ARGS(bg_cache) ); +TRACE_EVENT(btrfs_set_extent_bit, + TP_PROTO(const struct extent_io_tree *tree, + u64 start, u64 len, unsigned set_bits), + + TP_ARGS(tree, start, len, set_bits), + + TP_STRUCT__entry_btrfs( + __field( unsigned, owner ) + __field( u64, ino ) + __field( u64, rootid ) + __field( u64, start ) + __field( u64, len ) + __field( unsigned, set_bits) + ), + + TP_fast_assign_btrfs(tree->fs_info, + __entry->owner = tree->owner; + if (tree->private_data) { + struct inode *inode = tree->private_data; + + __entry->ino = btrfs_ino(BTRFS_I(inode)); + __entry->rootid = + BTRFS_I(inode)->root->root_key.objectid; + } else { + __entry->ino = 0; + __entry->rootid = 0; + } + __entry->start = start; + __entry->len = len; + __entry->set_bits = set_bits; + ), + + TP_printk_btrfs( + "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s", + show_extent_io_tree_owner(__entry->owner), __entry->ino, + __entry->rootid, __entry->start, __entry->len, + __print_flags(__entry->set_bits, "|", EXTENT_FLAGS)) +); + +TRACE_EVENT(btrfs_clear_extent_bit, + TP_PROTO(const struct extent_io_tree *tree, + u64 start, u64 len, unsigned clear_bits), + + TP_ARGS(tree, start, len, clear_bits), + + TP_STRUCT__entry_btrfs( + __field( unsigned, owner ) + __field( u64, ino ) + __field( u64, rootid ) + __field( u64, start ) + __field( u64, len ) + __field( unsigned, clear_bits) + ), + + TP_fast_assign_btrfs(tree->fs_info, + __entry->owner = tree->owner; + if (tree->private_data) { + struct inode *inode = tree->private_data; + + __entry->ino = btrfs_ino(BTRFS_I(inode)); + __entry->rootid = + BTRFS_I(inode)->root->root_key.objectid; + } else { + __entry->ino = 0; + __entry->rootid = 0; + } + __entry->start = start; + __entry->len = len; + __entry->clear_bits = clear_bits; + ), + + TP_printk_btrfs( + "io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s", + show_extent_io_tree_owner(__entry->owner), __entry->ino, + __entry->rootid, __entry->start, __entry->len, + __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS)) +); + +TRACE_EVENT(btrfs_convert_extent_bit, + TP_PROTO(const struct extent_io_tree *tree, + u64 start, u64 len, unsigned set_bits, unsigned clear_bits), + + TP_ARGS(tree, start, len, set_bits, clear_bits), + + TP_STRUCT__entry_btrfs( + __field( unsigned, owner ) + __field( u64, ino ) + __field( u64, rootid ) + __field( u64, start ) + __field( u64, len ) + __field( unsigned, set_bits) + __field( unsigned, clear_bits) + ), + + TP_fast_assign_btrfs(tree->fs_info, + __entry->owner = tree->owner; + if (tree->private_data) { + struct inode *inode = tree->private_data; + + __entry->ino = btrfs_ino(BTRFS_I(inode)); + __entry->rootid = + BTRFS_I(inode)->root->root_key.objectid; + } else { + __entry->ino = 0; + __entry->rootid = 0; + } + __entry->start = start; + __entry->len = len; + __entry->set_bits = set_bits; + __entry->clear_bits = clear_bits; + ), + + TP_printk_btrfs( +"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s", + show_extent_io_tree_owner(__entry->owner), __entry->ino, + __entry->rootid, __entry->start, __entry->len, + __print_flags(__entry->set_bits , "|", EXTENT_FLAGS), + __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS)) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From 443c8e2a839f18084cfdb5b0b62d90b8159f48ae Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 7 Mar 2019 17:14:00 +0100 Subject: btrfs: reduce kmap_atomic time for checksumming Since commit c40a3d38aff4 ("Btrfs: Compute and look up csums based on sectorsized blocks") we do a kmap_atomic() on the contents of a bvec. The code before c40a3d38aff4 had the kmap region just around the checksumming too. kmap_atomic() in turn does a preempt_disable() and pagefault_disable(), so we shouldn't map the data for too long. Reduce the time the bvec's page is mapped to when we actually need it. Performance wise it doesn't seem to make a huge difference with a 2 vcpu VM on a /dev/zram device: vanilla patched delta write 17.4MiB/s 17.8MiB/s +0.4MiB/s (+2%) read 40.6MiB/s 41.5MiB/s +0.9MiB/s (+2%) The following fio job profile was used in the comparision: [global] ioengine=libaio direct=1 sync=1 norandommap time_based runtime=10m size=100m group_reporting numjobs=2 [test] filename=/mnt/test/fio rw=randrw rwmixread=70 Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cccc75d15970..6fccac9eab96 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -458,8 +458,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, BUG_ON(!ordered); /* Logic error */ } - data = kmap_atomic(bvec.bv_page); - nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len + fs_info->sectorsize - 1); @@ -469,7 +467,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, offset < ordered->file_offset) { unsigned long bytes_left; - kunmap_atomic(data); sums->len = this_sum_bytes; this_sum_bytes = 0; btrfs_add_ordered_sum(inode, ordered, sums); @@ -489,16 +486,16 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) + total_bytes; index = 0; - - data = kmap_atomic(bvec.bv_page); } sums->sums[index] = ~(u32)0; + data = kmap_atomic(bvec.bv_page); sums->sums[index] = btrfs_csum_data(data + bvec.bv_offset + (i * fs_info->sectorsize), sums->sums[index], fs_info->sectorsize); + kunmap_atomic(data); btrfs_csum_final(sums->sums[index], (char *)(sums->sums + index)); index++; @@ -507,7 +504,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, total_bytes += fs_info->sectorsize; } - kunmap_atomic(data); } this_sum_bytes = 0; btrfs_add_ordered_sum(inode, ordered, sums); -- cgit v1.2.3-59-g8ed1b From 7715da84f74d5d3fed45ad69b2b5e28601ad721f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 1 Mar 2019 12:34:47 +0800 Subject: btrfs: merge _btrfs_set_prop helpers btrfs_set_prop() is a redirect to __btrfs_set_prop() with the transaction handle equal to NULL. __btrfs_set_prop() in turn passes this to do_setxattr() which then transaction is actually created. Instead merge __btrfs_set_prop() to btrfs_set_prop(), and update the caller with NULL argument. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 ++++++---- fs/btrfs/props.c | 22 +++++----------------- fs/btrfs/props.h | 6 ++---- fs/btrfs/xattr.c | 2 +- 4 files changed, 14 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index cd4e693406a0..c1566787a146 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -284,7 +284,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) binode->flags &= ~BTRFS_INODE_COMPRESS; binode->flags |= BTRFS_INODE_NOCOMPRESS; - ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); + ret = btrfs_set_prop(NULL, inode, "btrfs.compression", NULL, 0, + 0); if (ret && ret != -ENODATA) goto out_drop; } else if (fsflags & FS_COMPR_FL) { @@ -302,13 +303,14 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); - ret = btrfs_set_prop(inode, "btrfs.compression", - comp, strlen(comp), 0); + ret = btrfs_set_prop(NULL, inode, "btrfs.compression", comp, + strlen(comp), 0); if (ret) goto out_drop; } else { - ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); + ret = btrfs_set_prop(NULL, inode, "btrfs.compression", NULL, 0, + 0); if (ret && ret != -ENODATA) goto out_drop; binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 61d22a56c0ba..e9aa7ac0583e 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -85,12 +85,9 @@ find_prop_handler(const char *name, return NULL; } -static int __btrfs_set_prop(struct btrfs_trans_handle *trans, - struct inode *inode, - const char *name, - const char *value, - size_t value_len, - int flags) +int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const char *value, size_t value_len, + int flags) { const struct prop_handler *handler; int ret; @@ -133,15 +130,6 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans, return 0; } -int btrfs_set_prop(struct inode *inode, - const char *name, - const char *value, - size_t value_len, - int flags) -{ - return __btrfs_set_prop(NULL, inode, name, value, value_len, flags); -} - static int iterate_object_props(struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -313,8 +301,8 @@ static int inherit_props(struct btrfs_trans_handle *trans, num_bytes, BTRFS_RESERVE_NO_FLUSH); if (ret) goto out; - ret = __btrfs_set_prop(trans, inode, h->xattr_name, - value, strlen(value), 0); + ret = btrfs_set_prop(trans, inode, h->xattr_name, value, + strlen(value), 0); btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); if (ret) goto out; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 618815b4f9d5..9dbdae47cf27 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -10,10 +10,8 @@ void __init btrfs_props_init(void); -int btrfs_set_prop(struct inode *inode, - const char *name, - const char *value, - size_t value_len, +int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const char *value, size_t value_len, int flags); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f141b45ce349..499bb79ba135 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -379,7 +379,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, size_t size, int flags) { name = xattr_full_name(handler, name); - return btrfs_set_prop(inode, name, value, size, flags); + return btrfs_set_prop(NULL, inode, name, value, size, flags); } static const struct xattr_handler btrfs_security_xattr_handler = { -- cgit v1.2.3-59-g8ed1b From 3dcf96c7b9fe800560f550af1b7cece254d31bc3 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 1 Mar 2019 12:34:48 +0800 Subject: btrfs: drop redundant forward declaration in props.c Drop forward declaration of the functions: - prop_compression_validate - prop_compression_apply - prop_compression_extract No functional changes. Reviewed-by: Nikolay Borisov Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 163 +++++++++++++++++++++++++++---------------------------- 1 file changed, 79 insertions(+), 84 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index e9aa7ac0583e..c52cd78fea31 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -23,36 +23,6 @@ struct prop_handler { int inheritable; }; -static int prop_compression_validate(const char *value, size_t len); -static int prop_compression_apply(struct inode *inode, - const char *value, - size_t len); -static const char *prop_compression_extract(struct inode *inode); - -static struct prop_handler prop_handlers[] = { - { - .xattr_name = XATTR_BTRFS_PREFIX "compression", - .validate = prop_compression_validate, - .apply = prop_compression_apply, - .extract = prop_compression_extract, - .inheritable = 1 - }, -}; - -void __init btrfs_props_init(void) -{ - int i; - - hash_init(prop_handlers_ht); - - for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) { - struct prop_handler *p = &prop_handlers[i]; - u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name)); - - hash_add(prop_handlers_ht, &p->node, h); - } -} - static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash) { struct hlist_head *h; @@ -271,6 +241,78 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) return ret; } +static int prop_compression_validate(const char *value, size_t len) +{ + if (!value) + return 0; + + if (!strncmp("lzo", value, 3)) + return 0; + else if (!strncmp("zlib", value, 4)) + return 0; + else if (!strncmp("zstd", value, 4)) + return 0; + + return -EINVAL; +} + +static int prop_compression_apply(struct inode *inode, const char *value, + size_t len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + int type; + + if (len == 0) { + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; + BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE; + + return 0; + } + + if (!strncmp("lzo", value, 3)) { + type = BTRFS_COMPRESS_LZO; + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); + } else if (!strncmp("zlib", value, 4)) { + type = BTRFS_COMPRESS_ZLIB; + } else if (!strncmp("zstd", value, 4)) { + type = BTRFS_COMPRESS_ZSTD; + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + } else { + return -EINVAL; + } + + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; + BTRFS_I(inode)->prop_compress = type; + + return 0; +} + +static const char *prop_compression_extract(struct inode *inode) +{ + switch (BTRFS_I(inode)->prop_compress) { + case BTRFS_COMPRESS_ZLIB: + case BTRFS_COMPRESS_LZO: + case BTRFS_COMPRESS_ZSTD: + return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress); + default: + break; + } + + return NULL; +} + +static struct prop_handler prop_handlers[] = { + { + .xattr_name = XATTR_BTRFS_PREFIX "compression", + .validate = prop_compression_validate, + .apply = prop_compression_apply, + .extract = prop_compression_extract, + .inheritable = 1 + }, +}; + static int inherit_props(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *parent) @@ -352,64 +394,17 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, return ret; } -static int prop_compression_validate(const char *value, size_t len) -{ - if (!strncmp("lzo", value, 3)) - return 0; - else if (!strncmp("zlib", value, 4)) - return 0; - else if (!strncmp("zstd", value, 4)) - return 0; - - return -EINVAL; -} - -static int prop_compression_apply(struct inode *inode, - const char *value, - size_t len) +void __init btrfs_props_init(void) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int type; - - if (len == 0) { - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; - BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; - BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE; - - return 0; - } - - if (!strncmp("lzo", value, 3)) { - type = BTRFS_COMPRESS_LZO; - btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); - } else if (!strncmp("zlib", value, 4)) { - type = BTRFS_COMPRESS_ZLIB; - } else if (!strncmp("zstd", value, 4)) { - type = BTRFS_COMPRESS_ZSTD; - btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); - } else { - return -EINVAL; - } + int i; - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; - BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; - BTRFS_I(inode)->prop_compress = type; + hash_init(prop_handlers_ht); - return 0; -} + for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) { + struct prop_handler *p = &prop_handlers[i]; + u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name)); -static const char *prop_compression_extract(struct inode *inode) -{ - switch (BTRFS_I(inode)->prop_compress) { - case BTRFS_COMPRESS_ZLIB: - case BTRFS_COMPRESS_LZO: - case BTRFS_COMPRESS_ZSTD: - return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress); - default: - break; + hash_add(prop_handlers_ht, &p->node, h); } - - return NULL; } - -- cgit v1.2.3-59-g8ed1b From 419a6f30fd84202494962cc9c47d2c5d8ef4f7c4 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 1 Mar 2019 12:34:49 +0800 Subject: btrfs: rename fs_info argument to fs_private fs_info is commonly used to represent struct fs_info *, rename to fs_private to avoid confusion. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 499bb79ba135..6971cbf286b5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -419,10 +419,10 @@ const struct xattr_handler *btrfs_xattr_handlers[] = { }; static int btrfs_initxattrs(struct inode *inode, - const struct xattr *xattr_array, void *fs_info) + const struct xattr *xattr_array, void *fs_private) { + struct btrfs_trans_handle *trans = fs_private; const struct xattr *xattr; - struct btrfs_trans_handle *trans = fs_info; unsigned int nofs_flag; char *name; int err = 0; -- cgit v1.2.3-59-g8ed1b From 262c96a3c3670bf2322b9a0c9d74e2a3d9e43be0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 1 Mar 2019 12:34:50 +0800 Subject: btrfs: refactor btrfs_set_prop and add btrfs_set_prop_trans btrfs_set_prop() takes transaction pointer as the first argument, however in ioctl.c for the purpose of setting the compression property, we call btrfs_set_prop() with NULL transaction pointer. Down in the call chain btrfs_setxattr() starts transaction to update the attribute and also to update the inode. So for clarity, create btrfs_set_prop_trans() with no transaction pointer as argument, in preparation to start transaction here instead of doing it down the call chain at btrfs_setxattr(). Also now the btrfs_set_prop() is a static function. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 12 ++++++------ fs/btrfs/props.c | 12 +++++++++--- fs/btrfs/props.h | 5 ++--- fs/btrfs/xattr.c | 2 +- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c1566787a146..4d945b3d88e3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -284,8 +284,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) binode->flags &= ~BTRFS_INODE_COMPRESS; binode->flags |= BTRFS_INODE_NOCOMPRESS; - ret = btrfs_set_prop(NULL, inode, "btrfs.compression", NULL, 0, - 0); + ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, + 0, 0); if (ret && ret != -ENODATA) goto out_drop; } else if (fsflags & FS_COMPR_FL) { @@ -303,14 +303,14 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); - ret = btrfs_set_prop(NULL, inode, "btrfs.compression", comp, - strlen(comp), 0); + ret = btrfs_set_prop_trans(inode, "btrfs.compression", comp, + strlen(comp), 0); if (ret) goto out_drop; } else { - ret = btrfs_set_prop(NULL, inode, "btrfs.compression", NULL, 0, - 0); + ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, + 0, 0); if (ret && ret != -ENODATA) goto out_drop; binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index c52cd78fea31..722ccf6bdd2b 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -55,9 +55,9 @@ find_prop_handler(const char *name, return NULL; } -int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, - const char *name, const char *value, size_t value_len, - int flags) +static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const char *value, size_t value_len, + int flags) { const struct prop_handler *handler; int ret; @@ -100,6 +100,12 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return 0; } +int btrfs_set_prop_trans(struct inode *inode, const char *name, + const char *value, size_t value_len, int flags) +{ + return btrfs_set_prop(NULL, inode, name, value, value_len, flags); +} + static int iterate_object_props(struct btrfs_root *root, struct btrfs_path *path, u64 objectid, diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 9dbdae47cf27..b1a6b233b774 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -10,9 +10,8 @@ void __init btrfs_props_init(void); -int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, - const char *name, const char *value, size_t value_len, - int flags); +int btrfs_set_prop_trans(struct inode *inode, const char *name, + const char *value, size_t value_len, int flags); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6971cbf286b5..69126d5b4d62 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -379,7 +379,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, size_t size, int flags) { name = xattr_full_name(handler, name); - return btrfs_set_prop(NULL, inode, name, value, size, flags); + return btrfs_set_prop_trans(inode, name, value, size, flags); } static const struct xattr_handler btrfs_security_xattr_handler = { -- cgit v1.2.3-59-g8ed1b From e8baf7abcf56da68a03df77f51459acddeef4195 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 09:35:15 -0700 Subject: btrfs: Turn an 'else if' into an 'else' in btrfs_uuid_tree_add When building with -Wsometimes-uninitialized, Clang warns: fs/btrfs/uuid-tree.c:129:13: warning: variable 'eb' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] fs/btrfs/uuid-tree.c:129:13: warning: variable 'offset' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] Clang can't tell that all cases are covered with this final else if. Just turn it into an else so that it is clear. Link: https://github.com/ClangBuiltLinux/linux/issues/385 Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/uuid-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 3b2ae342e649..c1cc9a5c0024 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -126,7 +126,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); - } else if (ret < 0) { + } else { btrfs_warn(fs_info, "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!", ret, (unsigned long long)key.objectid, -- cgit v1.2.3-59-g8ed1b From 4e586ca3c3e63269e136b8c1f20bf5943a0b94ca Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 14 Mar 2019 15:28:30 +0200 Subject: btrfs: Remove EXTENT_WRITEBACK This flag was introduced in a52d9a8033c4 ("Btrfs: Extent based page cache code.") and subsequently it's usage effectively was removed by 1edbb734b4e0 ("Btrfs: reduce CPU usage in the extent_state tree") and f2a97a9dbd86 ("btrfs: remove all unused functions"). Just remove it, no functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 +-- fs/btrfs/extent_io.h | 31 +++++++++++++++---------------- include/trace/events/btrfs.h | 1 - 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ee246a7e14c5..5725adfb19c9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4229,8 +4229,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) } if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED | EXTENT_WRITEBACK, - 0, NULL)) { + EXTENT_LOCKED, 0, NULL)) { set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &btrfs_inode->runtime_flags); remove_extent_mapping(map, em); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index aef7a46b1e61..a1dc61b7945d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -9,22 +9,21 @@ /* bits for the extent state */ #define EXTENT_DIRTY (1U << 0) -#define EXTENT_WRITEBACK (1U << 1) -#define EXTENT_UPTODATE (1U << 2) -#define EXTENT_LOCKED (1U << 3) -#define EXTENT_NEW (1U << 4) -#define EXTENT_DELALLOC (1U << 5) -#define EXTENT_DEFRAG (1U << 6) -#define EXTENT_BOUNDARY (1U << 9) -#define EXTENT_NODATASUM (1U << 10) -#define EXTENT_CLEAR_META_RESV (1U << 11) -#define EXTENT_NEED_WAIT (1U << 12) -#define EXTENT_DAMAGED (1U << 13) -#define EXTENT_NORESERVE (1U << 14) -#define EXTENT_QGROUP_RESERVED (1U << 15) -#define EXTENT_CLEAR_DATA_RESV (1U << 16) -#define EXTENT_DELALLOC_NEW (1U << 17) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +#define EXTENT_UPTODATE (1U << 1) +#define EXTENT_LOCKED (1U << 2) +#define EXTENT_NEW (1U << 3) +#define EXTENT_DELALLOC (1U << 4) +#define EXTENT_DEFRAG (1U << 5) +#define EXTENT_BOUNDARY (1U << 6) +#define EXTENT_NODATASUM (1U << 7) +#define EXTENT_CLEAR_META_RESV (1U << 8) +#define EXTENT_NEED_WAIT (1U << 9) +#define EXTENT_DAMAGED (1U << 10) +#define EXTENT_NORESERVE (1U << 11) +#define EXTENT_QGROUP_RESERVED (1U << 12) +#define EXTENT_CLEAR_DATA_RESV (1U << 13) +#define EXTENT_DELALLOC_NEW (1U << 14) +#define EXTENT_IOBITS (EXTENT_LOCKED) #define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \ EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 74a11b23b7d4..8b12753fee78 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -102,7 +102,6 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); #define EXTENT_FLAGS \ { EXTENT_DIRTY, "DIRTY"}, \ - { EXTENT_WRITEBACK, "WRITEBACK"}, \ { EXTENT_UPTODATE, "UPTODATE"}, \ { EXTENT_LOCKED, "LOCKED"}, \ { EXTENT_NEW, "NEW"}, \ -- cgit v1.2.3-59-g8ed1b From 8882679ea50b9ceb8b86cbceb061322a97876534 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 14 Mar 2019 15:28:31 +0200 Subject: btrfs: Remove EXTENT_IOBITS This flag just became synonymous to EXTENT_LOCKED, so just remove it and used EXTENT_LOCKED directly. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 +++++------ fs/btrfs/extent_io.h | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5725adfb19c9..cc3941ae1ff6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -403,7 +403,7 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *other; struct rb_node *other_node; - if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) + if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) return; other_node = rb_prev(&state->rb_node); @@ -622,7 +622,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (delete) bits |= ~EXTENT_CTLBITS; - if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) + if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) clear = 1; again: if (!prealloc && gfpflags_allow_blocking(mask)) { @@ -854,7 +854,7 @@ static void cache_state(struct extent_state *state, struct extent_state **cached_ptr) { return cache_state_if_flags(state, cached_ptr, - EXTENT_IOBITS | EXTENT_BOUNDARY); + EXTENT_LOCKED | EXTENT_BOUNDARY); } /* @@ -4173,10 +4173,9 @@ static int try_release_extent_state(struct extent_io_tree *tree, u64 end = start + PAGE_SIZE - 1; int ret = 1; - if (test_range_bit(tree, start, end, - EXTENT_IOBITS, 0, NULL)) + if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) { ret = 0; - else { + } else { /* * at this point we can safely clear everything except the * locked bit and the nodatasum bit diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a1dc61b7945d..1e5be66c7e0b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -23,7 +23,6 @@ #define EXTENT_QGROUP_RESERVED (1U << 12) #define EXTENT_CLEAR_DATA_RESV (1U << 13) #define EXTENT_DELALLOC_NEW (1U << 14) -#define EXTENT_IOBITS (EXTENT_LOCKED) #define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \ EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) -- cgit v1.2.3-59-g8ed1b From e4e9fd0f326212ee3971418334c255a26c8a2735 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 14:45:20 +0200 Subject: btrfs: add assertion helpers for spinning writers Add helpers for conditional DEBUG build to assert that the extent buffer spinning_writers constraints are met. Will be used in followup patches. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/locking.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 82b84e4daad1..13ef1decdea6 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -14,6 +14,30 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb); +#ifdef CONFIG_BTRFS_DEBUG +static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) +{ + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_inc(&eb->spinning_writers); +} + +static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) +{ + WARN_ON(atomic_read(&eb->spinning_writers) != 1); + atomic_dec(&eb->spinning_writers); +} + +static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) +{ + WARN_ON(atomic_read(&eb->spinning_writers)); +} + +#else +static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } +static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } +static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { } +#endif + void btrfs_set_lock_blocking_read(struct extent_buffer *eb) { /* -- cgit v1.2.3-59-g8ed1b From 843ccf9f46baff289946e897b11fd813de62d06f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 14:56:28 +0200 Subject: btrfs: use assertion helpers for spinning writers Use the helpers where open coded. On non-debug builds, the warnings will not trigger and extent_buffer::spining_writers become unused and can be moved to the appropriate section, saving a few bytes. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++++- fs/btrfs/extent_io.h | 2 +- fs/btrfs/locking.c | 16 ++++++---------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cc3941ae1ff6..fc0451c3e24e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4682,7 +4682,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, atomic_set(&eb->blocking_readers, 0); atomic_set(&eb->blocking_writers, 0); atomic_set(&eb->spinning_readers, 0); - atomic_set(&eb->spinning_writers, 0); eb->lock_nested = 0; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); @@ -4700,6 +4699,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, > MAX_INLINE_EXTENT_BUFFER_SIZE); BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); +#ifdef CONFIG_BTRFS_DEBUG + atomic_set(&eb->spinning_writers, 0); +#endif + return eb; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1e5be66c7e0b..3577ef33bc36 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -166,7 +166,6 @@ struct extent_buffer { atomic_t blocking_writers; atomic_t blocking_readers; atomic_t spinning_readers; - atomic_t spinning_writers; short lock_nested; /* >= 0 if eb belongs to a log tree, -1 otherwise */ short log_index; @@ -185,6 +184,7 @@ struct extent_buffer { wait_queue_head_t read_lock_wq; struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG + atomic_t spinning_writers; struct list_head leak_list; #endif }; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 13ef1decdea6..a5a3c5118f61 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -64,8 +64,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb) if (eb->lock_nested && current->pid == eb->lock_owner) return; if (atomic_read(&eb->blocking_writers) == 0) { - WARN_ON(atomic_read(&eb->spinning_writers) != 1); - atomic_dec(&eb->spinning_writers); + btrfs_assert_spinning_writers_put(eb); btrfs_assert_tree_locked(eb); atomic_inc(&eb->blocking_writers); write_unlock(&eb->lock); @@ -101,8 +100,7 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb) return; BUG_ON(atomic_read(&eb->blocking_writers) != 1); write_lock(&eb->lock); - WARN_ON(atomic_read(&eb->spinning_writers)); - atomic_inc(&eb->spinning_writers); + btrfs_assert_spinning_writers_get(eb); /* atomic_dec_and_test implies a barrier */ if (atomic_dec_and_test(&eb->blocking_writers)) cond_wake_up_nomb(&eb->write_lock_wq); @@ -200,7 +198,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) return 0; } atomic_inc(&eb->write_locks); - atomic_inc(&eb->spinning_writers); + btrfs_assert_spinning_writers_get(eb); eb->lock_owner = current->pid; return 1; } @@ -266,8 +264,7 @@ again: write_unlock(&eb->lock); goto again; } - WARN_ON(atomic_read(&eb->spinning_writers)); - atomic_inc(&eb->spinning_writers); + btrfs_assert_spinning_writers_get(eb); atomic_inc(&eb->write_locks); eb->lock_owner = current->pid; } @@ -286,14 +283,13 @@ void btrfs_tree_unlock(struct extent_buffer *eb) atomic_dec(&eb->write_locks); if (blockers) { - WARN_ON(atomic_read(&eb->spinning_writers)); + btrfs_assert_no_spinning_writers(eb); atomic_dec(&eb->blocking_writers); /* Use the lighter barrier after atomic */ smp_mb__after_atomic(); cond_wake_up_nomb(&eb->write_lock_wq); } else { - WARN_ON(atomic_read(&eb->spinning_writers) != 1); - atomic_dec(&eb->spinning_writers); + btrfs_assert_spinning_writers_put(eb); write_unlock(&eb->lock); } } -- cgit v1.2.3-59-g8ed1b From 225948dedc9d3ac514eb1f4b7318d541cfd80aaf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 15:53:42 +0200 Subject: btrfs: add assertion helpers for spinning readers Add helpers for conditional DEBUG build to assert that the extent buffer spinning_readers constraints are met. Will be used in followup patches. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/locking.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a5a3c5118f61..2dd3ae524aa3 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -32,10 +32,23 @@ static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) WARN_ON(atomic_read(&eb->spinning_writers)); } +static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->spinning_readers); +} + +static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) +{ + WARN_ON(atomic_read(&eb->spinning_readers) == 0); + atomic_dec(&eb->spinning_readers); +} + #else static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { } +static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { } +static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { } #endif void btrfs_set_lock_blocking_read(struct extent_buffer *eb) -- cgit v1.2.3-59-g8ed1b From afd495a8264fb25cef49834b5c3559b8aaa612ee Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 15:57:38 +0200 Subject: btrfs: use assertion helpers for spinning readers Use the helpers where open coded. On non-debug builds, the warnings will not trigger and extent_buffer::spining_readers become unused and can be moved to the appropriate section, saving a few bytes. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/locking.c | 14 ++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fc0451c3e24e..f010475f74fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4681,7 +4681,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, atomic_set(&eb->read_locks, 0); atomic_set(&eb->blocking_readers, 0); atomic_set(&eb->blocking_writers, 0); - atomic_set(&eb->spinning_readers, 0); eb->lock_nested = 0; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); @@ -4701,6 +4700,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, #ifdef CONFIG_BTRFS_DEBUG atomic_set(&eb->spinning_writers, 0); + atomic_set(&eb->spinning_readers, 0); #endif return eb; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3577ef33bc36..89e56df64d6c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -165,7 +165,6 @@ struct extent_buffer { atomic_t read_locks; atomic_t blocking_writers; atomic_t blocking_readers; - atomic_t spinning_readers; short lock_nested; /* >= 0 if eb belongs to a log tree, -1 otherwise */ short log_index; @@ -185,6 +184,7 @@ struct extent_buffer { struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG atomic_t spinning_writers; + atomic_t spinning_readers; struct list_head leak_list; #endif }; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 2dd3ae524aa3..47bcd288977d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -62,8 +62,7 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb) return; btrfs_assert_tree_read_locked(eb); atomic_inc(&eb->blocking_readers); - WARN_ON(atomic_read(&eb->spinning_readers) == 0); - atomic_dec(&eb->spinning_readers); + btrfs_assert_spinning_readers_put(eb); read_unlock(&eb->lock); } @@ -95,7 +94,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb) return; BUG_ON(atomic_read(&eb->blocking_readers) == 0); read_lock(&eb->lock); - atomic_inc(&eb->spinning_readers); + btrfs_assert_spinning_readers_get(eb); /* atomic_dec_and_test implies a barrier */ if (atomic_dec_and_test(&eb->blocking_readers)) cond_wake_up_nomb(&eb->read_lock_wq); @@ -150,7 +149,7 @@ again: goto again; } atomic_inc(&eb->read_locks); - atomic_inc(&eb->spinning_readers); + btrfs_assert_spinning_readers_get(eb); } /* @@ -169,7 +168,7 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) return 0; } atomic_inc(&eb->read_locks); - atomic_inc(&eb->spinning_readers); + btrfs_assert_spinning_readers_get(eb); return 1; } @@ -190,7 +189,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb) return 0; } atomic_inc(&eb->read_locks); - atomic_inc(&eb->spinning_readers); + btrfs_assert_spinning_readers_get(eb); return 1; } @@ -232,8 +231,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) return; } btrfs_assert_tree_read_locked(eb); - WARN_ON(atomic_read(&eb->spinning_readers) == 0); - atomic_dec(&eb->spinning_readers); + btrfs_assert_spinning_readers_put(eb); atomic_dec(&eb->read_locks); read_unlock(&eb->lock); } -- cgit v1.2.3-59-g8ed1b From 58a2ddaedbf74b8a209426128c130cc9f0dbd11b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 16:13:41 +0200 Subject: btrfs: add assertion helpers for extent buffer read lock counters The read_locks are a simple counter to track locking balance and used to assert tree locks. Add helpers to make it conditionally work only in DEBUG builds. Will be used in followup patches. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/locking.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 47bcd288977d..d3bb19835ab4 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -12,8 +12,6 @@ #include "extent_io.h" #include "locking.h" -static void btrfs_assert_tree_read_locked(struct extent_buffer *eb); - #ifdef CONFIG_BTRFS_DEBUG static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { @@ -43,12 +41,30 @@ static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) atomic_dec(&eb->spinning_readers); } +static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->read_locks); +} + +static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) +{ + atomic_dec(&eb->read_locks); +} + +static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) +{ + BUG_ON(!atomic_read(&eb->read_locks)); +} + #else static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { } static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { } static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { } +static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { } +static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { } +static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { } #endif void btrfs_set_lock_blocking_read(struct extent_buffer *eb) @@ -309,8 +325,3 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb) { BUG_ON(!atomic_read(&eb->write_locks)); } - -static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) -{ - BUG_ON(!atomic_read(&eb->read_locks)); -} -- cgit v1.2.3-59-g8ed1b From 5c9c799ab78336a4161b16126952a7e1320a8c77 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 16:15:51 +0200 Subject: btrfs: use assertion helpers for extent buffer read lock counters Use the helpers where open coded. On non-debug builds, the warnings will not trigger and extent_buffer::read_locks become unused and can be moved to the appropriate section, saving a few bytes. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/locking.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f010475f74fd..c189b018ed69 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4678,7 +4678,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, eb->bflags = 0; rwlock_init(&eb->lock); atomic_set(&eb->write_locks, 0); - atomic_set(&eb->read_locks, 0); atomic_set(&eb->blocking_readers, 0); atomic_set(&eb->blocking_writers, 0); eb->lock_nested = 0; @@ -4701,6 +4700,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, #ifdef CONFIG_BTRFS_DEBUG atomic_set(&eb->spinning_writers, 0); atomic_set(&eb->spinning_readers, 0); + atomic_set(&eb->read_locks, 0); #endif return eb; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 89e56df64d6c..41c5d0e9dc75 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -162,7 +162,6 @@ struct extent_buffer { /* count of read lock holders on the extent buffer */ atomic_t write_locks; - atomic_t read_locks; atomic_t blocking_writers; atomic_t blocking_readers; short lock_nested; @@ -185,6 +184,7 @@ struct extent_buffer { #ifdef CONFIG_BTRFS_DEBUG atomic_t spinning_writers; atomic_t spinning_readers; + atomic_t read_locks; struct list_head leak_list; #endif }; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d3bb19835ab4..1088cf322fdd 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -164,7 +164,7 @@ again: atomic_read(&eb->blocking_writers) == 0); goto again; } - atomic_inc(&eb->read_locks); + btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); } @@ -183,7 +183,7 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) read_unlock(&eb->lock); return 0; } - atomic_inc(&eb->read_locks); + btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); return 1; } @@ -204,7 +204,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb) read_unlock(&eb->lock); return 0; } - atomic_inc(&eb->read_locks); + btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); return 1; } @@ -248,7 +248,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) } btrfs_assert_tree_read_locked(eb); btrfs_assert_spinning_readers_put(eb); - atomic_dec(&eb->read_locks); + btrfs_assert_tree_read_locks_put(eb); read_unlock(&eb->lock); } @@ -272,7 +272,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) /* atomic_dec_and_test implies a barrier */ if (atomic_dec_and_test(&eb->blocking_readers)) cond_wake_up_nomb(&eb->read_lock_wq); - atomic_dec(&eb->read_locks); + btrfs_assert_tree_read_locks_put(eb); } /* -- cgit v1.2.3-59-g8ed1b From e3f153886702302482918a3788ba3bb24a37a4aa Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 16:20:02 +0200 Subject: btrfs: add assertion helpers for extent buffer write lock counters The write_locks are a simple counter to track locking balance and used to assert tree locks. Add helpers to make it conditionally work only in DEBUG builds. Will be used in followup patches. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/locking.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1088cf322fdd..1c9bb0620e4b 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,6 +56,21 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) BUG_ON(!atomic_read(&eb->read_locks)); } +static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->write_locks); +} + +static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) +{ + atomic_dec(&eb->write_locks); +} + +void btrfs_assert_tree_locked(struct extent_buffer *eb) +{ + BUG_ON(!atomic_read(&eb->write_locks)); +} + #else static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } @@ -65,6 +80,9 @@ static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { } static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { } static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { } static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { } +void btrfs_assert_tree_locked(struct extent_buffer *eb) { } +static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { } +static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { } #endif void btrfs_set_lock_blocking_read(struct extent_buffer *eb) @@ -320,8 +338,3 @@ void btrfs_tree_unlock(struct extent_buffer *eb) write_unlock(&eb->lock); } } - -void btrfs_assert_tree_locked(struct extent_buffer *eb) -{ - BUG_ON(!atomic_read(&eb->write_locks)); -} -- cgit v1.2.3-59-g8ed1b From c79adfc085c0662385cfcb55f15590303212e8e9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 16:24:26 +0200 Subject: btrfs: use assertion helpers for extent buffer write lock counters Use the helpers where open coded. On non-debug builds, the warnings will not trigger and extent_buffer::write_locks become unused and can be moved to the appropriate section, saving a few bytes. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/locking.c | 6 +++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c189b018ed69..97d32b80060d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4677,7 +4677,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, eb->fs_info = fs_info; eb->bflags = 0; rwlock_init(&eb->lock); - atomic_set(&eb->write_locks, 0); atomic_set(&eb->blocking_readers, 0); atomic_set(&eb->blocking_writers, 0); eb->lock_nested = 0; @@ -4701,6 +4700,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, atomic_set(&eb->spinning_writers, 0); atomic_set(&eb->spinning_readers, 0); atomic_set(&eb->read_locks, 0); + atomic_set(&eb->write_locks, 0); #endif return eb; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41c5d0e9dc75..43a9530279db 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -160,8 +160,6 @@ struct extent_buffer { struct rcu_head rcu_head; pid_t lock_owner; - /* count of read lock holders on the extent buffer */ - atomic_t write_locks; atomic_t blocking_writers; atomic_t blocking_readers; short lock_nested; @@ -185,6 +183,7 @@ struct extent_buffer { atomic_t spinning_writers; atomic_t spinning_readers; atomic_t read_locks; + atomic_t write_locks; struct list_head leak_list; #endif }; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c9bb0620e4b..3f7b5989e31e 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -243,7 +243,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) write_unlock(&eb->lock); return 0; } - atomic_inc(&eb->write_locks); + btrfs_assert_tree_write_locks_get(eb); btrfs_assert_spinning_writers_get(eb); eb->lock_owner = current->pid; return 1; @@ -310,7 +310,7 @@ again: goto again; } btrfs_assert_spinning_writers_get(eb); - atomic_inc(&eb->write_locks); + btrfs_assert_tree_write_locks_get(eb); eb->lock_owner = current->pid; } @@ -325,7 +325,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb) btrfs_assert_tree_locked(eb); eb->lock_owner = 0; - atomic_dec(&eb->write_locks); + btrfs_assert_tree_write_locks_put(eb); if (blockers) { btrfs_assert_no_spinning_writers(eb); -- cgit v1.2.3-59-g8ed1b From ed1b4ed79df258f08f16eac4a4fb96dc6d1a0f3a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 24 Aug 2018 16:31:17 +0200 Subject: btrfs: switch extent_buffer::lock_nested to bool The member is tracking simple status of the lock, we can use bool for that and make some room for further space reduction in the structure. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/locking.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 97d32b80060d..d61b526dc856 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4679,7 +4679,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, rwlock_init(&eb->lock); atomic_set(&eb->blocking_readers, 0); atomic_set(&eb->blocking_writers, 0); - eb->lock_nested = 0; + eb->lock_nested = false; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 43a9530279db..586baed03780 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -162,7 +162,7 @@ struct extent_buffer { atomic_t blocking_writers; atomic_t blocking_readers; - short lock_nested; + bool lock_nested; /* >= 0 if eb belongs to a log tree, -1 otherwise */ short log_index; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 3f7b5989e31e..6df03ba36026 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -172,7 +172,7 @@ again: * called on a partly (write-)locked tree. */ BUG_ON(eb->lock_nested); - eb->lock_nested = 1; + eb->lock_nested = true; read_unlock(&eb->lock); return; } @@ -261,7 +261,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) * field only matters to the lock owner. */ if (eb->lock_nested && current->pid == eb->lock_owner) { - eb->lock_nested = 0; + eb->lock_nested = false; return; } btrfs_assert_tree_read_locked(eb); @@ -282,7 +282,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) * field only matters to the lock owner. */ if (eb->lock_nested && current->pid == eb->lock_owner) { - eb->lock_nested = 0; + eb->lock_nested = false; return; } btrfs_assert_tree_read_locked(eb); -- cgit v1.2.3-59-g8ed1b From e65ef21ed850a535defa2ead971fc8f88ad3b616 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 11 Mar 2019 09:55:38 +0200 Subject: btrfs: Exploit the fact that pages passed to extent_readpages are always contiguous Currently extent_readpages (called from btrfs_readpages) will always call __extent_readpages which tries to create contiguous range of pages and call __do_contiguous_readpages when such contiguous range is created. It turns out this is unnecessary due to the fact that generic MM code always calls filesystem's ->readpages callback (btrfs_readpages in this case) with already contiguous pages. Armed with this knowledge it's possible to simplify extent_readpages by eliminating the call to __extent_readpages and directly calling contiguous_readpages. The only edge case that needs to be handled is when add_to_page_cache_lru fails. This is easy as all that is needed is to submit whatever is the number of pages successfully added to the lru. This can happen when the page is already in the range, so it does not need to be read again, and we can't do anything else in case of other errors. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 58 +++++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d61b526dc856..9901e8127e0f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3076,7 +3076,7 @@ out: return ret; } -static inline void __do_contiguous_readpages(struct extent_io_tree *tree, +static inline void contiguous_readpages(struct extent_io_tree *tree, struct page *pages[], int nr_pages, u64 start, u64 end, struct extent_map **em_cached, @@ -3107,46 +3107,6 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, } } -static void __extent_readpages(struct extent_io_tree *tree, - struct page *pages[], - int nr_pages, - struct extent_map **em_cached, - struct bio **bio, unsigned long *bio_flags, - u64 *prev_em_start) -{ - u64 start = 0; - u64 end = 0; - u64 page_start; - int index; - int first_index = 0; - - for (index = 0; index < nr_pages; index++) { - page_start = page_offset(pages[index]); - if (!end) { - start = page_start; - end = start + PAGE_SIZE - 1; - first_index = index; - } else if (end + 1 == page_start) { - end += PAGE_SIZE; - } else { - __do_contiguous_readpages(tree, &pages[first_index], - index - first_index, start, - end, em_cached, - bio, bio_flags, - prev_em_start); - start = page_start; - end = start + PAGE_SIZE - 1; - first_index = index; - } - } - - if (end) - __do_contiguous_readpages(tree, &pages[first_index], - index - first_index, start, - end, em_cached, bio, - bio_flags, prev_em_start); -} - static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, @@ -4109,6 +4069,8 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages, u64 prev_em_start = (u64)-1; while (!list_empty(pages)) { + u64 contig_end = 0; + for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) { struct page *page = lru_to_page(pages); @@ -4117,14 +4079,22 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages, if (add_to_page_cache_lru(page, mapping, page->index, readahead_gfp_mask(mapping))) { put_page(page); - continue; + break; } pagepool[nr++] = page; + contig_end = page_offset(page) + PAGE_SIZE - 1; } - __extent_readpages(tree, pagepool, nr, &em_cached, &bio, - &bio_flags, &prev_em_start); + if (nr) { + u64 contig_start = page_offset(pagepool[0]); + + ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end); + + contiguous_readpages(tree, pagepool, nr, contig_start, + contig_end, &em_cached, &bio, &bio_flags, + &prev_em_start); + } } if (em_cached) -- cgit v1.2.3-59-g8ed1b From d51f51bb6f3c11f3ee4120c35de8e6547ed493fc Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 18 Mar 2019 17:45:18 +0200 Subject: btrfs: Remove unused -EIO assignment in end_bio_extent_readpage In case we hit the error case for a metadata buffer in end_bio_extent_readpage then 'ret' won't really be checked before it's written again to. This means the -EIO in this case will never be checked, just remove it. Fixes-coverity-id: 1442513 Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9901e8127e0f..d0b8ed2c2631 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2614,8 +2614,6 @@ static void end_bio_extent_readpage(struct bio *bio) if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(eb, -EIO); - - ret = -EIO; } readpage_ok: if (likely(uptodate)) { -- cgit v1.2.3-59-g8ed1b From 10995c0491204c861948c9850939a7f4e90760a4 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 18 Mar 2019 10:48:19 +0800 Subject: btrfs: reloc: Fix NULL pointer dereference due to expanded reloc_root lifespan Commit d2311e698578 ("btrfs: relocation: Delay reloc tree deletion after merge_reloc_roots()") expands the life span of root->reloc_root. This breaks certain checs of fs_info->reloc_ctl. Before that commit, if we have a root with valid reloc_root, then it's ensured to have fs_info->reloc_ctl. But now since reloc_root doesn't always mean a valid fs_info->reloc_ctl, such check is unreliable and can cause the following NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at 00000000000005c1 IP: btrfs_reloc_pre_snapshot+0x20/0x50 [btrfs] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 10379 Comm: snapperd Not tainted Call Trace: create_pending_snapshot+0xd7/0xfc0 [btrfs] create_pending_snapshots+0x8e/0xb0 [btrfs] btrfs_commit_transaction+0x2ac/0x8f0 [btrfs] btrfs_mksubvol+0x561/0x570 [btrfs] btrfs_ioctl_snap_create_transid+0x189/0x190 [btrfs] btrfs_ioctl_snap_create_v2+0x102/0x150 [btrfs] btrfs_ioctl+0x5c9/0x1e60 [btrfs] do_vfs_ioctl+0x90/0x5f0 SyS_ioctl+0x74/0x80 do_syscall_64+0x7b/0x150 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fd7cdab8467 Fix it by explicitly checking fs_info->reloc_ctl other than using the implied root->reloc_root. Fixes: d2311e698578 ("btrfs: relocation: Delay reloc tree deletion after merge_reloc_roots") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b52fb99646dc..1af2bdf5c877 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4668,14 +4668,12 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve) { - struct btrfs_root *root; - struct reloc_control *rc; + struct btrfs_root *root = pending->root; + struct reloc_control *rc = root->fs_info->reloc_ctl; - root = pending->root; - if (!root->reloc_root) + if (!root->reloc_root || !rc) return; - rc = root->fs_info->reloc_ctl; if (!rc->merge_reloc_tree) return; @@ -4704,10 +4702,10 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *reloc_root; struct btrfs_root *new_root; - struct reloc_control *rc; + struct reloc_control *rc = root->fs_info->reloc_ctl; int ret; - if (!root->reloc_root) + if (!root->reloc_root || !rc) return 0; rc = root->fs_info->reloc_ctl; -- cgit v1.2.3-59-g8ed1b From 80fbc341dcff73b4e976b753e4b9ac3db992f229 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 19 Mar 2019 14:04:17 +0800 Subject: btrfs: Make btrfs_(set|clear)_header_flag return void From the introduction of btrfs_(set|clear)_header_flag, there is no usage of its return value. So just make it return void. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b3642367a595..c03852d1aa34 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2163,18 +2163,16 @@ static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) return (btrfs_header_flags(eb) & flag) == flag; } -static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) +static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) { u64 flags = btrfs_header_flags(eb); btrfs_set_header_flags(eb, flags | flag); - return (flags & flag) == flag; } -static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) +static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) { u64 flags = btrfs_header_flags(eb); btrfs_set_header_flags(eb, flags & ~flag); - return (flags & flag) == flag; } static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) -- cgit v1.2.3-59-g8ed1b From 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 14 Mar 2019 09:52:35 +0200 Subject: btrfs: Correctly free extent buffer in case btree_read_extent_buffer_pages fails If a an eb fails to be read for whatever reason - it's corrupted on disk and parent transid/key validations fail or IO for eb pages fail then this buffer must be removed from the buffer cache. Currently the code calls free_extent_buffer if an error occurs. Unfortunately this doesn't achieve the desired behavior since btrfs_find_create_tree_block returns with eb->refs == 2. On the other hand free_extent_buffer will only decrement the refs once leaving it added to the buffer cache radix tree. This enables later code to look up the buffer from the cache and utilize it potentially leading to a crash. The correct way to free the buffer is call free_extent_buffer_stale. This function will correctly call atomic_dec explicitly for the buffer and subsequently call release_extent_buffer which will decrement the final reference thus correctly remove the invalid buffer from buffer cache. This change affects only newly allocated buffers since they have eb->refs == 2. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 Reported-by: Jungyeon CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 46b368d84aa3..ea44cf136131 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1021,13 +1021,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) { struct extent_buffer *buf = NULL; struct inode *btree_inode = fs_info->btree_inode; + int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, 0); - free_extent_buffer(buf); + + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, + WAIT_NONE, 0); + if (ret < 0) + free_extent_buffer_stale(buf); + else + free_extent_buffer(buf); } int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, @@ -1047,12 +1052,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, mirror_num); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ret; } if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return -EIO; } else if (extent_buffer_uptodate(buf)) { *eb = buf; @@ -1106,7 +1111,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, level, first_key); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ERR_PTR(ret); } return buf; -- cgit v1.2.3-59-g8ed1b From 448de471cd4cab0cedd15770082567a69a784a11 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Mar 2019 17:10:40 +0800 Subject: btrfs: Check the first key and level for cached extent buffer [BUG] When reading a file from a fuzzed image, kernel can panic like: BTRFS warning (device loop0): csum failed root 5 ino 270 off 0 csum 0x98f94189 expected csum 0x00000000 mirror 1 assertion failed: !memcmp_extent_buffer(b, &disk_key, offsetof(struct btrfs_leaf, items[0].key), sizeof(disk_key)), file: fs/btrfs/ctree.c, line: 2544 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3500! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:btrfs_search_slot.cold.24+0x61/0x63 [btrfs] Call Trace: btrfs_lookup_csum+0x52/0x150 [btrfs] __btrfs_lookup_bio_sums+0x209/0x640 [btrfs] btrfs_submit_bio_hook+0x103/0x170 [btrfs] submit_one_bio+0x59/0x80 [btrfs] extent_read_full_page+0x58/0x80 [btrfs] generic_file_read_iter+0x2f6/0x9d0 __vfs_read+0x14d/0x1a0 vfs_read+0x8d/0x140 ksys_read+0x52/0xc0 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [CAUSE] The fuzzed image has a corrupted leaf whose first key doesn't match its parent: checksum tree key (CSUM_TREE ROOT_ITEM 0) node 29741056 level 1 items 14 free 107 generation 19 owner CSUM_TREE fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c ... key (EXTENT_CSUM EXTENT_CSUM 79691776) block 29761536 gen 19 leaf 29761536 items 1 free space 1726 generation 19 owner CSUM_TREE leaf 29761536 flags 0x1(WRITTEN) backref revision 1 fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c item 0 key (EXTENT_CSUM EXTENT_CSUM 8798638964736) itemoff 1751 itemsize 2244 range start 8798638964736 end 8798641262592 length 2297856 When reading the above tree block, we have extent_buffer->refs = 2 in the context: - initial one from __alloc_extent_buffer() alloc_extent_buffer() |- __alloc_extent_buffer() |- atomic_set(&eb->refs, 1) - one being added to fs_info->buffer_radix alloc_extent_buffer() |- check_buffer_tree_ref() |- atomic_inc(&eb->refs) So if even we call free_extent_buffer() in read_tree_block or other similar situation, we only decrease the refs by 1, it doesn't reach 0 and won't be freed right now. The staled eb and its corrupted content will still be kept cached. Furthermore, we have several extra cases where we either don't do first key check or the check is not proper for all callers: - scrub We just don't have first key in this context. - shared tree block One tree block can be shared by several snapshot/subvolume trees. In that case, the first key check for one subvolume doesn't apply to another. So for the above reasons, a corrupted extent buffer can sneak into the buffer cache. [FIX] Call verify_level_key in read_block_for_search to do another verification. For that purpose the function is exported. Due to above reasons, although we can free corrupted extent buffer from cache, we still need the check in read_block_for_search(), for scrub and shared tree blocks. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202757 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202759 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202761 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202767 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202769 Reported-by: Yoon Jungyeon CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 10 ++++++++++ fs/btrfs/disk-io.c | 10 +++++----- fs/btrfs/disk-io.h | 3 +++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 324df36d28bf..65b12963e72b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2416,6 +2416,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (tmp) { /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { + /* + * Do extra check for first_key, eb can be stale due to + * being cached, read from scrub, or have multiple + * parents (shared tree blocks). + */ + if (btrfs_verify_level_key(fs_info, tmp, + parent_level - 1, &first_key, gen)) { + free_extent_buffer(tmp); + return -EUCLEAN; + } *eb_ret = tmp; return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ea44cf136131..71141ae72cc9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -394,9 +394,9 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } -static int verify_level_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int level, - struct btrfs_key *first_key, u64 parent_transid) +int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid) { int found_level; struct btrfs_key found_key; @@ -473,8 +473,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, if (verify_parent_transid(io_tree, eb, parent_transid, 0)) ret = -EIO; - else if (verify_level_key(fs_info, eb, level, - first_key, parent_transid)) + else if (btrfs_verify_level_key(fs_info, eb, level, + first_key, parent_transid)) ret = -EUCLEAN; else break; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 987a64bc0c66..67a9fe2d29c7 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,9 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_device; struct btrfs_fs_devices; +int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 parent_transid, int level, struct btrfs_key *first_key); -- cgit v1.2.3-59-g8ed1b From 63489055e4c0dd4d2a04064138906630995456dc Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:39 +0800 Subject: btrfs: Always output error message when key/level verification fails We have internal report of strange transaction abort due to EUCLEAN without any error message. Since error message inside verify_level_key() is only enabled for CONFIG_BTRFS_DEBUG, the error message won't be printed on most builds. This patch will make the error message mandatory, so when problem happens we know what's causing the problem. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71141ae72cc9..fb9c9e0783af 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -404,12 +404,11 @@ int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, found_level = btrfs_header_level(eb); if (found_level != level) { -#ifdef CONFIG_BTRFS_DEBUG - WARN_ON(1); + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree level check failed\n"); btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, level, found_level); -#endif return -EIO; } @@ -430,9 +429,9 @@ int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, btrfs_item_key_to_cpu(eb, &found_key, 0); ret = btrfs_comp_cpu_keys(first_key, &found_key); -#ifdef CONFIG_BTRFS_DEBUG if (ret) { - WARN_ON(1); + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree first key check failed\n"); btrfs_err(fs_info, "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", eb->start, parent_transid, first_key->objectid, @@ -440,7 +439,6 @@ int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, found_key.objectid, found_key.type, found_key.offset); } -#endif return ret; } -- cgit v1.2.3-59-g8ed1b From f4340622e02261fae599e3da936ff4808b418173 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:41 +0800 Subject: btrfs: extent_io: Move the BUG_ON() in flush_write_bio() one level up We have a BUG_ON() in flush_write_bio() to handle the return value of submit_one_bio(). Move the BUG_ON() one level up to all its callers. This patch will introduce temporary variable, @flush_ret to keep code change minimal in this patch. That variable will be cleaned up when enhancing the error handling later. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 55 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d0b8ed2c2631..43091b29b8d7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -170,15 +170,28 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, return blk_status_to_errno(ret); } -static void flush_write_bio(struct extent_page_data *epd) +/* + * Submit bio from extent page data via submit_one_bio + * + * Return 0 if everything is OK. + * Return <0 for error. + */ +static int __must_check flush_write_bio(struct extent_page_data *epd) { - if (epd->bio) { - int ret; + int ret = 0; + if (epd->bio) { ret = submit_one_bio(epd->bio, 0, 0); - BUG_ON(ret < 0); /* -ENOMEM */ + /* + * Clean up of epd->bio is handled by its endio function. + * And endio is either triggered by successful bio execution + * or the error handler of submit bio hook. + * So at this point, no matter what happened, we don't need + * to clean up epd->bio. + */ epd->bio = NULL; } + return ret; } int __init extent_io_init(void) @@ -3476,7 +3489,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!btrfs_try_tree_write_lock(eb)) { flush = 1; - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); btrfs_tree_lock(eb); } @@ -3485,7 +3499,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!epd->sync_io) return 0; if (!flush) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); flush = 1; } while (1) { @@ -3526,7 +3541,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); flush = 1; } lock_page(p); @@ -3718,6 +3734,7 @@ int btree_write_cache_pages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; int ret = 0; + int flush_ret; int done = 0; int nr_to_write_done = 0; struct pagevec pvec; @@ -3817,7 +3834,8 @@ retry: index = 0; goto retry; } - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -3914,7 +3932,8 @@ retry: * tmpfs file mapping */ if (!trylock_page(page)) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); lock_page(page); } @@ -3924,8 +3943,10 @@ retry: } if (wbc->sync_mode != WB_SYNC_NONE) { - if (PageWriteback(page)) - flush_write_bio(epd); + if (PageWriteback(page)) { + ret = flush_write_bio(epd); + BUG_ON(ret < 0); + } wait_on_page_writeback(page); } @@ -3986,6 +4007,7 @@ retry: int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; + int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(page->mapping->host)->io_tree, @@ -3995,7 +4017,8 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) ret = __extent_writepage(page, wbc, &epd); - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -4003,6 +4026,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int mode) { int ret = 0; + int flush_ret; struct address_space *mapping = inode->i_mapping; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *page; @@ -4035,7 +4059,8 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, start += PAGE_SIZE; } - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -4043,6 +4068,7 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret = 0; + int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(mapping->host)->io_tree, @@ -4051,7 +4077,8 @@ int extent_writepages(struct address_space *mapping, }; ret = extent_write_cache_pages(mapping, wbc, &epd); - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } -- cgit v1.2.3-59-g8ed1b From 3065976b045f77a910809fa7699f99a1e7c0dbbb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:42 +0800 Subject: btrfs: extent_io: Handle errors better in extent_write_full_page() Since now flush_write_bio() could return error, kill the BUG_ON() first. Then don't call flush_write_bio() unconditionally, instead we check the return value from __extent_writepage() first. If __extent_writepage() fails, we do cleanup, and return error without submitting the possible corrupted or half-baked bio. If __extent_writepage() successes, then we call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 43091b29b8d7..9d52f3b78732 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -170,6 +170,16 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, return blk_status_to_errno(ret); } +/* Cleanup unsubmitted bios */ +static void end_write_bio(struct extent_page_data *epd, int ret) +{ + if (epd->bio) { + epd->bio->bi_status = errno_to_blk_status(ret); + bio_endio(epd->bio); + epd->bio = NULL; + } +} + /* * Submit bio from extent page data via submit_one_bio * @@ -3397,6 +3407,9 @@ done: * records are inserted to lock ranges in the tree, and as dirty areas * are found, they are marked writeback. Then the lock bits are removed * and the end_io handler clears the writeback ranges + * + * Return 0 if everything goes well. + * Return <0 for error. */ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) @@ -3466,6 +3479,7 @@ done: end_extent_writepage(page, ret, start, page_end); } unlock_page(page); + ASSERT(ret <= 0); return ret; done_unlocked: @@ -4007,7 +4021,6 @@ retry: int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; - int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(page->mapping->host)->io_tree, @@ -4016,9 +4029,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) }; ret = __extent_writepage(page, wbc, &epd); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ret = flush_write_bio(&epd); + ASSERT(ret <= 0); return ret; } -- cgit v1.2.3-59-g8ed1b From 2b952eea813b1f7e7d4b9782271acd91625b9bb9 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:43 +0800 Subject: btrfs: extent_io: Handle errors better in btree_write_cache_pages() In btree_write_cache_pages(), we can only get @ret <= 0. Add an ASSERT() for it just in case. Then instead of submitting the write bio even we got some error, check the return value first. If we have already hit some error, just clean up the corrupted or half-baked bio, and return error. If there is no error so far, then call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9d52f3b78732..8399fc4b27ae 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3748,7 +3748,6 @@ int btree_write_cache_pages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; int ret = 0; - int flush_ret; int done = 0; int nr_to_write_done = 0; struct pagevec pvec; @@ -3848,8 +3847,12 @@ retry: index = 0; goto retry; } - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } + ret = flush_write_bio(&epd); return ret; } -- cgit v1.2.3-59-g8ed1b From e06808be8a5296c09be84b3aaf63087b5737ba16 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:44 +0800 Subject: btrfs: extent_io: Kill dead condition in extent_write_cache_pages() Since __extent_writepage() will no longer return >0 value, (ret == AOP_WRITEPAGE_ACTIVATE) will never be true. Kill that dead branch. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8399fc4b27ae..cd76be7013d8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3974,11 +3974,6 @@ retry: } ret = __extent_writepage(page, wbc, epd); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } if (ret < 0) { /* * done_index is set past this page, -- cgit v1.2.3-59-g8ed1b From 02c6db4f7308e4f5adf4df2ef623160bfdb18636 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:45 +0800 Subject: btrfs: extent_io: Handle errors better in extent_write_locked_range() We can only get @ret <= 0. Add an ASSERT() for it just in case. Then, instead of submitting the write bio even we got some error, check the return value first. If we have already hit some error, just clean up the corrupted or half-baked bio, and return error. If there is no error so far, then call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cd76be7013d8..b43a0a0f41e4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4042,7 +4042,6 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int mode) { int ret = 0; - int flush_ret; struct address_space *mapping = inode->i_mapping; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *page; @@ -4075,8 +4074,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, start += PAGE_SIZE; } - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } + ret = flush_write_bio(&epd); return ret; } -- cgit v1.2.3-59-g8ed1b From 2e3c25136adfb293d517e17f761d3b8a43a8fc22 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:46 +0800 Subject: btrfs: extent_io: add proper error handling to lock_extent_buffer_for_io() This function needs some extra checks on locked pages and eb. For error handling we need to unlock locked pages and the eb. There is a rare >0 return value branch, where all pages get locked while write bio is not flushed. Thankfully it's handled by the only caller, btree_write_cache_pages(), as later write_one_eb() call will trigger submit_one_bio(). So there shouldn't be any problem. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b43a0a0f41e4..38dcac0a152e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3492,19 +3492,27 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) TASK_UNINTERRUPTIBLE); } +/* + * Lock eb pages and flush the bio if we can't the locks + * + * Return 0 if nothing went wrong + * Return >0 is same as 0, except bio is not submitted + * Return <0 if something went wrong, no page is locked + */ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, struct btrfs_fs_info *fs_info, struct extent_page_data *epd) { - int i, num_pages; + int i, num_pages, failed_page_nr; int flush = 0; int ret = 0; if (!btrfs_try_tree_write_lock(eb)) { - flush = 1; ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) + return ret; + flush = 1; btrfs_tree_lock(eb); } @@ -3514,7 +3522,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, return 0; if (!flush) { ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) + return ret; flush = 1; } while (1) { @@ -3556,7 +3565,10 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) { + failed_page_nr = i; + goto err_unlock; + } flush = 1; } lock_page(p); @@ -3564,6 +3576,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, } return ret; +err_unlock: + /* Unlock already locked pages */ + for (i = 0; i < failed_page_nr; i++) + unlock_page(eb->pages[i]); + return ret; } static void end_extent_buffer_writeback(struct extent_buffer *eb) -- cgit v1.2.3-59-g8ed1b From a2a72fbd1110323b2008dcb3ed14494957341dfe Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:48 +0800 Subject: btrfs: extent_io: Handle errors better in extent_writepages() We can only get <=0 from extent_write_cache_pages, add an ASSERT() for it just in case. Then instead of submitting the write bio even if we got some error, check the return value first. If we have already hit some error, just clean up the corrupted or half-baked bio, and return error. If there is no error so far, then call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 38dcac0a152e..e304d5f50c5a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4104,7 +4104,6 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret = 0; - int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(mapping->host)->io_tree, @@ -4113,8 +4112,12 @@ int extent_writepages(struct address_space *mapping, }; ret = extent_write_cache_pages(mapping, wbc, &epd); - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } + ret = flush_write_bio(&epd); return ret; } -- cgit v1.2.3-59-g8ed1b From 6a8d2136ca3f0ed597d9f3dffa3821dd6bc7b11a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 20 Mar 2019 21:53:16 +0200 Subject: btrfs: Use less confusing condition for uptodate parameter to btrfs_writepage_endio_finish_ordered The uptodate parameter of btrfs_writepage_endio_finish_ordered is used to signal whether an error has occured while writing the given page. 0 signals an error, which is propagated to callees and 1 signifies success. In end_compressed_bio_write the ->bi_status is checked and based on it either BLK_STS_OK (0) or BLK_STS_NOTSUPP (1) are used. While from functional point of view this is ok it's a for the poor reader of the code, since the block layer values are conflated with the semantics of the parameter. Just use plain 0 or 1. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4f2a8ae0aa42..1463e14af2fb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio) cb->compressed_pages[0]->mapping = cb->inode->i_mapping; btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], cb->start, cb->start + cb->len - 1, - bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP); + bio->bi_status == BLK_STS_OK); cb->compressed_pages[0]->mapping = NULL; end_compressed_writeback(inode, cb); -- cgit v1.2.3-59-g8ed1b From ff612ba7849964b1898fd3ccd1f56941129c6aab Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Feb 2019 11:14:45 -0500 Subject: btrfs: fix panic during relocation after ENOSPC before writeback happens We've been seeing the following sporadically throughout our fleet panic: kernel BUG at fs/btrfs/relocation.c:4584! netversion: 5.0-0 Backtrace: #0 [ffffc90003adb880] machine_kexec at ffffffff81041da8 #1 [ffffc90003adb8c8] __crash_kexec at ffffffff8110396c #2 [ffffc90003adb988] crash_kexec at ffffffff811048ad #3 [ffffc90003adb9a0] oops_end at ffffffff8101c19a #4 [ffffc90003adb9c0] do_trap at ffffffff81019114 #5 [ffffc90003adba00] do_error_trap at ffffffff810195d0 #6 [ffffc90003adbab0] invalid_op at ffffffff81a00a9b [exception RIP: btrfs_reloc_cow_block+692] RIP: ffffffff8143b614 RSP: ffffc90003adbb68 RFLAGS: 00010246 RAX: fffffffffffffff7 RBX: ffff8806b9c32000 RCX: ffff8806aad00690 RDX: ffff880850b295e0 RSI: ffff8806b9c32000 RDI: ffff88084f205bd0 RBP: ffff880849415000 R8: ffffc90003adbbe0 R9: ffff88085ac90000 R10: ffff8805f7369140 R11: 0000000000000000 R12: ffff880850b295e0 R13: ffff88084f205bd0 R14: 0000000000000000 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffffc90003adbbb0] __btrfs_cow_block at ffffffff813bf1cd #8 [ffffc90003adbc28] btrfs_cow_block at ffffffff813bf4b3 #9 [ffffc90003adbc78] btrfs_search_slot at ffffffff813c2e6c The way relocation moves data extents is by creating a reloc inode and preallocating extents in this inode and then copying the data into these preallocated extents. Once we've done this for all of our extents, we'll write out these dirty pages, which marks the extent written, and goes into btrfs_reloc_cow_block(). From here we get our current reloc_control, which _should_ match the reloc_control for the current block group we're relocating. However if we get an ENOSPC in this path at some point we'll bail out, never initiating writeback on this inode. Not a huge deal, unless we happen to be doing relocation on a different block group, and this block group is now rc->stage == UPDATE_DATA_PTRS. This trips the BUG_ON() in btrfs_reloc_cow_block(), because we expect to be done modifying the data inode. We are in fact done modifying the metadata for the data inode we're currently using, but not the one from the failed block group, and thus we BUG_ON(). (This happens when writeback finishes for extents from the previous group, when we are at btrfs_finish_ordered_io() which updates the data reloc tree (inode item, drops/adds extent items, etc).) Fix this by writing out the reloc data inode always, and then breaking out of the loop after that point to keep from tripping this BUG_ON() later. Signed-off-by: Josef Bacik Reviewed-by: Filipe Manana [ add note from Filipe ] Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 1af2bdf5c877..25fc6a4f3ecf 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4331,27 +4331,36 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) mutex_lock(&fs_info->cleaner_mutex); ret = relocate_block_group(rc); mutex_unlock(&fs_info->cleaner_mutex); - if (ret < 0) { + if (ret < 0) err = ret; - goto out; - } - - if (rc->extents_found == 0) - break; - - btrfs_info(fs_info, "found %llu extents", rc->extents_found); + /* + * We may have gotten ENOSPC after we already dirtied some + * extents. If writeout happens while we're relocating a + * different block group we could end up hitting the + * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in + * btrfs_reloc_cow_block. Make sure we write everything out + * properly so we don't trip over this problem, and then break + * out of the loop if we hit an error. + */ if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { ret = btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); - if (ret) { + if (ret) err = ret; - goto out; - } invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; } + + if (err < 0) + goto out; + + if (rc->extents_found == 0) + break; + + btrfs_info(fs_info, "found %llu extents", rc->extents_found); + } WARN_ON(rc->block_group->pinned > 0); -- cgit v1.2.3-59-g8ed1b From 75391f0d41197e54b61ada92ddeffc28bf028c8c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:40 +0800 Subject: btrfs: disk-io: Show the timing of corrupted tree block explicitly Just add one extra line to show when the corruption is detected. Currently only read time detection is possible. The planned distinguish line would be: read time: block=XXXXX read time tree block corruption detected write time: block=XXXXX write time tree block corruption detected Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb9c9e0783af..f4ef3a1a321b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -662,6 +662,10 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, if (!ret) set_extent_buffer_uptodate(eb); + else + btrfs_err(fs_info, + "block=%llu read time tree block corruption detected", + eb->start); err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) -- cgit v1.2.3-59-g8ed1b From d46a05edac440168a31805e583c8ab3f9c9561f9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 16:43:11 +0100 Subject: btrfs: tests: handle fs_info allocation failure in extent_io tests Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index e46ed2985b19..74f69df7a7e1 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -392,6 +392,10 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) ? sectorsize * 4 : sectorsize; fs_info = btrfs_alloc_dummy_fs_info(len, len); + if (!fs_info) { + test_err("could not allocate fs_info"); + return -ENOMEM; + } bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { -- cgit v1.2.3-59-g8ed1b From d33d105b85d6099e2499c536fb6fbb2dc65ea644 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 16:46:55 +0100 Subject: btrfs: tests: don't leak fs_info in extent_io bitmap tests The fs_info is not freed at the end of the function and leaks. The function is called twice so there can be up to 2x sizeof(struct btrfs_fs_info) of leaked memory. Fortunatelly this affects only testing builds, the size could be 16k with several debugging features enabled. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 74f69df7a7e1..24003e97e797 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -378,8 +378,8 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) { struct btrfs_fs_info *fs_info; unsigned long len; - unsigned long *bitmap; - struct extent_buffer *eb; + unsigned long *bitmap = NULL; + struct extent_buffer *eb = NULL; int ret; test_msg("running extent buffer bitmap tests"); @@ -400,14 +400,15 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { test_err("couldn't allocate test bitmap"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } eb = __alloc_dummy_extent_buffer(fs_info, 0, len); if (!eb) { test_err("couldn't allocate test extent buffer"); - kfree(bitmap); - return -ENOMEM; + ret = -ENOMEM; + goto out; } ret = __test_eb_bitmaps(bitmap, eb, len); @@ -419,14 +420,15 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { test_err("couldn't allocate test extent buffer"); - kfree(bitmap); - return -ENOMEM; + ret = -ENOMEM; + goto out; } ret = __test_eb_bitmaps(bitmap, eb, len); out: free_extent_buffer(eb); kfree(bitmap); + btrfs_free_dummy_fs_info(fs_info); return ret; } -- cgit v1.2.3-59-g8ed1b From efd31fce54f78a172edd7ef61f9661cc614c56b2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:03:55 +0100 Subject: btrfs: tests: print file:line for error messages For better diagnostics print the file name and line to locate the errors. Sample output: [ 9.052924] BTRFS: selftest: fs/btrfs/tests/extent-io-tests.c:283 offset bits do not match Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 70ff9f9d86a1..bb9e9c234535 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -10,7 +10,8 @@ int btrfs_run_sanity_tests(void); #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__) -#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__) +#define test_err(fmt, ...) pr_err("BTRFS: selftest: %s:%d " fmt "\n", \ + __FILE__, __LINE__, ##__VA_ARGS__) struct btrfs_root; struct btrfs_trans_handle; -- cgit v1.2.3-59-g8ed1b From 703de4266f63ba1703fd04d4838fefd555dfbf9a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:23:30 +0100 Subject: btrfs: tests: add table of most common errors Allocation of main objects like fs_info or extent buffers is in each test so let's simplify and unify the error messages to a table and add a convenience helper. Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.c | 10 ++++++++++ fs/btrfs/tests/btrfs-tests.h | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 1351ac2afdd2..314d7bdac9d5 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -17,6 +17,16 @@ static struct vfsmount *test_mnt = NULL; +const char *test_error[] = { + [TEST_ALLOC_FS_INFO] = "cannot allocate fs_info", + [TEST_ALLOC_ROOT] = "cannot allocate root", + [TEST_ALLOC_EXTENT_BUFFER] = "cannot extent buffer", + [TEST_ALLOC_PATH] = "cannot allocate path", + [TEST_ALLOC_INODE] = "cannot allocate inode", + [TEST_ALLOC_BLOCK_GROUP] = "cannot allocate block group", + [TEST_ALLOC_EXTENT_MAP] = "cannot allocate extent map", +}; + static const struct super_operations btrfs_test_super_ops = { .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_test_destroy_inode, diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index bb9e9c234535..ee277bbd939b 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -13,6 +13,20 @@ int btrfs_run_sanity_tests(void); #define test_err(fmt, ...) pr_err("BTRFS: selftest: %s:%d " fmt "\n", \ __FILE__, __LINE__, ##__VA_ARGS__) +#define test_std_err(index) test_err("%s", test_error[index]) + +enum { + TEST_ALLOC_FS_INFO, + TEST_ALLOC_ROOT, + TEST_ALLOC_EXTENT_BUFFER, + TEST_ALLOC_PATH, + TEST_ALLOC_INODE, + TEST_ALLOC_BLOCK_GROUP, + TEST_ALLOC_EXTENT_MAP, +}; + +extern const char *test_error[]; + struct btrfs_root; struct btrfs_trans_handle; -- cgit v1.2.3-59-g8ed1b From 37b2a7bc1ea919c8d00f2a2ece1917c38a30d6ae Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after fs_info allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/extent-io-tests.c | 2 +- fs/btrfs/tests/extent-map-tests.c | 2 +- fs/btrfs/tests/free-space-tests.c | 5 +++-- fs/btrfs/tests/free-space-tree-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 6 +++--- fs/btrfs/tests/qgroup-tests.c | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index 7d72eab6d32c..74c7975882d1 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -30,7 +30,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("could not allocate fs_info"); + test_std_err(TEST_ALLOC_FS_INFO); return -ENOMEM; } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 24003e97e797..5e92ee074bd0 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -393,7 +393,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(len, len); if (!fs_info) { - test_err("could not allocate fs_info"); + test_std_err(TEST_ALLOC_FS_INFO); return -ENOMEM; } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index bf15d3a7f20e..662b718506b9 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -349,7 +349,7 @@ int btrfs_test_extent_map(void) */ fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE); if (!fs_info) { - test_msg("Couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); return -ENOMEM; } diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 5c2f77e9439b..2051e1a19104 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -834,9 +834,10 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) test_msg("running btrfs free space cache tests"); fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); - if (!fs_info) + if (!fs_info) { + test_std_err(TEST_ALLOC_FS_INFO); return -ENOMEM; - + } /* * For ppc64 (with 64k page size), bytes per bitmap might be diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 89346da890cf..49fbf73c7f2b 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -444,7 +444,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index af0c8e30d9e2..510365370d81 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -238,7 +238,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); goto out; } @@ -839,7 +839,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); goto out; } @@ -935,7 +935,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); goto out; } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 412b910b04cc..c620f68462be 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -457,7 +457,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { - test_err("couldn't allocate dummy fs info"); + test_std_err(TEST_ALLOC_FS_INFO); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From 52ab7bca3583fa7a80b16232874784f8dcc4e560 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after root allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/free-space-tests.c | 1 + fs/btrfs/tests/free-space-tree-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 6 +++--- fs/btrfs/tests/qgroup-tests.c | 6 +++--- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index 74c7975882d1..83814b769bde 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -36,7 +36,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("could not allocate root"); + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(root); goto out; } diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 2051e1a19104..8dcdefab1280 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -854,6 +854,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(root); goto out; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 49fbf73c7f2b..79b5d0c97b7b 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -451,7 +451,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("couldn't allocate dummy root"); + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(root); goto out; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 510365370d81..5803f342c47b 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -244,7 +244,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("couldn't allocate root"); + test_std_err(TEST_ALLOC_ROOT); goto out; } @@ -845,7 +845,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("couldn't allocate root"); + test_std_err(TEST_ALLOC_ROOT); goto out; } @@ -941,7 +941,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("couldn't allocate root"); + test_std_err(TEST_ALLOC_ROOT); goto out; } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index c620f68462be..7e25a3a9f979 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -463,7 +463,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(root)) { - test_err("couldn't allocate root"); + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(root); goto out; } @@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) tmp_root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(tmp_root)) { - test_err("couldn't allocate a fs root"); + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(tmp_root); goto out; } @@ -510,7 +510,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) tmp_root = btrfs_alloc_dummy_root(fs_info); if (IS_ERR(tmp_root)) { - test_err("couldn't allocate a fs root"); + test_std_err(TEST_ALLOC_ROOT); ret = PTR_ERR(tmp_root); goto out; } -- cgit v1.2.3-59-g8ed1b From 9e3d9f8462ef020e2e4d256064f15fc7eb657156 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after extent buffer allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/extent-io-tests.c | 4 ++-- fs/btrfs/tests/free-space-tree-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index 83814b769bde..dc2582554cf0 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -50,7 +50,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize); if (!eb) { - test_err("could not allocate dummy buffer"); + test_std_err(TEST_ALLOC_EXTENT_BUFFER); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 5e92ee074bd0..904addec8aef 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -406,7 +406,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) eb = __alloc_dummy_extent_buffer(fs_info, 0, len); if (!eb) { - test_err("couldn't allocate test extent buffer"); + test_std_err(TEST_ALLOC_ROOT); ret = -ENOMEM; goto out; } @@ -419,7 +419,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) free_extent_buffer(eb); eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); if (!eb) { - test_err("couldn't allocate test extent buffer"); + test_std_err(TEST_ALLOC_ROOT); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 79b5d0c97b7b..58fbca92dd0d 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, root->node = alloc_test_extent_buffer(root->fs_info, nodesize); if (!root->node) { - test_err("couldn't allocate dummy buffer"); + test_std_err(TEST_ALLOC_EXTENT_BUFFER); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 5803f342c47b..e63c128c741c 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -250,7 +250,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) root->node = alloc_dummy_extent_buffer(fs_info, nodesize); if (!root->node) { - test_err("couldn't allocate dummy buffer"); + test_std_err(TEST_ALLOC_ROOT); goto out; } @@ -851,7 +851,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) root->node = alloc_dummy_extent_buffer(fs_info, nodesize); if (!root->node) { - test_err("couldn't allocate dummy buffer"); + test_std_err(TEST_ALLOC_ROOT); goto out; } -- cgit v1.2.3-59-g8ed1b From 770e0cc040b07eb54f6cb119a44bf6ff742338eb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after path allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/free-space-tree-tests.c | 2 +- fs/btrfs/tests/qgroup-tests.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index dc2582554cf0..a1b9f9b5978e 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -43,7 +43,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) path = btrfs_alloc_path(); if (!path) { - test_err("could not allocate path"); + test_std_err(TEST_ALLOC_PATH); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 58fbca92dd0d..22282c16c022 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -486,7 +486,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, path = btrfs_alloc_path(); if (!path) { - test_err("couldn't allocate path"); + test_std_err(TEST_ALLOC_ROOT); ret = -ENOMEM; goto out; } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 7e25a3a9f979..c85e4b955939 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -32,7 +32,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, path = btrfs_alloc_path(); if (!path) { - test_err("couldn't allocate path"); + test_std_err(TEST_ALLOC_ROOT); return -ENOMEM; } @@ -82,7 +82,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, path = btrfs_alloc_path(); if (!path) { - test_err("couldn't allocate path"); + test_std_err(TEST_ALLOC_ROOT); return -ENOMEM; } @@ -132,7 +132,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr, path = btrfs_alloc_path(); if (!path) { - test_err("couldn't allocate path"); + test_std_err(TEST_ALLOC_ROOT); return -ENOMEM; } path->leave_spinning = 1; @@ -166,7 +166,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, path = btrfs_alloc_path(); if (!path) { - test_err("couldn't allocate path"); + test_std_err(TEST_ALLOC_ROOT); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From 6a060db85d0a81b9c06bb134f30a55a6b89a0a02 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after inode allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 904addec8aef..3064b61b110b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -73,7 +73,7 @@ static int test_find_delalloc(u32 sectorsize) inode = btrfs_new_test_inode(); if (!inode) { - test_err("failed to allocate test inode"); + test_std_err(TEST_ALLOC_INODE); return -ENOMEM; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index e63c128c741c..46571cd27513 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -228,7 +228,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) inode = btrfs_new_test_inode(); if (!inode) { - test_err("couldn't allocate inode"); + test_std_err(TEST_ALLOC_INODE); return ret; } @@ -829,7 +829,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) inode = btrfs_new_test_inode(); if (!inode) { - test_err("couldn't allocate inode"); + test_std_err(TEST_ALLOC_INODE); return ret; } @@ -929,7 +929,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) inode = btrfs_new_test_inode(); if (!inode) { - test_err("couldn't allocate inode"); + test_std_err(TEST_ALLOC_INODE); return ret; } -- cgit v1.2.3-59-g8ed1b From 3199366da73dddbfa5a0b36a7438dac008d2053e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after block group allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/free-space-tests.c | 2 +- fs/btrfs/tests/free-space-tree-tests.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 8dcdefab1280..dcbe526e5698 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -847,7 +847,7 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize) cache = btrfs_alloc_dummy_block_group(fs_info, BITS_PER_BITMAP * sectorsize + PAGE_SIZE); if (!cache) { - test_err("couldn't run the tests"); + test_std_err(TEST_ALLOC_BLOCK_GROUP); btrfs_free_dummy_fs_info(fs_info); return 0; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 22282c16c022..09c27628e305 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment); if (!cache) { - test_err("couldn't allocate dummy block group cache"); + test_std_err(TEST_ALLOC_BLOCK_GROUP); ret = -ENOMEM; goto out; } -- cgit v1.2.3-59-g8ed1b From 7b9586bc2b31b8f62da651cb0ddf926997c93954 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:42:07 +0100 Subject: btrfs: tests: properly initialize fs_info of extent buffer The fs_info is supposed to be valid, even though it's not used right now and the test does not crash. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 3064b61b110b..e88b7c9667f8 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -417,7 +417,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) /* Do it over again with an extent buffer which isn't page-aligned. */ free_extent_buffer(eb); - eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); + eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len); if (!eb) { test_std_err(TEST_ALLOC_ROOT); ret = -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 488f673023be9a0a5d3f2078e294ff3be907efe0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 18:06:16 +0100 Subject: btrfs: tests: return errors from extent map tests The individual testcases for extent maps do not return an error on allocation failures. This is not a big problem as the allocation don't fail in general but there are functional tests handled with ASSERTS. This makes tests dependent on them and it's not reliable. This patch adds the allocation failure handling and allows for the conversion of the asserts to proper error handling and reporting. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 68 ++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 662b718506b9..c31b1ac3bc2d 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -47,7 +47,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree) * ->add_extent_mapping(0, 16K) * -> #handle -EEXIST */ -static void test_case_1(struct btrfs_fs_info *fs_info, +static int test_case_1(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree) { struct extent_map *em; @@ -57,8 +57,7 @@ static void test_case_1(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) - /* Skip the test on error. */ - return; + return -ENOMEM; /* Add [0, 16K) */ em->start = 0; @@ -71,8 +70,10 @@ static void test_case_1(struct btrfs_fs_info *fs_info, /* Add [16K, 20K) following [0, 16K) */ em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } em->start = SZ_16K; em->len = SZ_4K; @@ -83,8 +84,10 @@ static void test_case_1(struct btrfs_fs_info *fs_info, free_extent_map(em); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } /* Add [0, 8K), should return [0, 16K) instead. */ em->start = start; @@ -102,9 +105,12 @@ static void test_case_1(struct btrfs_fs_info *fs_info, start, start + len, ret, em->start, em->len, em->block_start, em->block_len); free_extent_map(em); + ret = 0; out: /* free memory */ free_extent_map_tree(em_tree); + + return ret; } /* @@ -113,7 +119,7 @@ out: * Reading the inline ending up with EEXIST, ie. read an inline * extent and discard page cache and read it again. */ -static void test_case_2(struct btrfs_fs_info *fs_info, +static int test_case_2(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree) { struct extent_map *em; @@ -121,8 +127,7 @@ static void test_case_2(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) - /* Skip the test on error. */ - return; + return -ENOMEM; /* Add [0, 1K) */ em->start = 0; @@ -135,8 +140,10 @@ static void test_case_2(struct btrfs_fs_info *fs_info, /* Add [4K, 4K) following [0, 1K) */ em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } em->start = SZ_4K; em->len = SZ_4K; @@ -147,8 +154,10 @@ static void test_case_2(struct btrfs_fs_info *fs_info, free_extent_map(em); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } /* Add [0, 1K) */ em->start = 0; @@ -166,12 +175,15 @@ static void test_case_2(struct btrfs_fs_info *fs_info, ret, em->start, em->len, em->block_start, em->block_len); free_extent_map(em); + ret = 0; out: /* free memory */ free_extent_map_tree(em_tree); + + return ret; } -static void __test_case_3(struct btrfs_fs_info *fs_info, +static int __test_case_3(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree, u64 start) { struct extent_map *em; @@ -180,8 +192,7 @@ static void __test_case_3(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) - /* Skip this test on error. */ - return; + return -ENOMEM; /* Add [4K, 8K) */ em->start = SZ_4K; @@ -193,8 +204,10 @@ static void __test_case_3(struct btrfs_fs_info *fs_info, free_extent_map(em); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } /* Add [0, 16K) */ em->start = 0; @@ -217,9 +230,12 @@ static void __test_case_3(struct btrfs_fs_info *fs_info, start, start + len, ret, em->start, em->len, em->block_start, em->block_len); free_extent_map(em); + ret = 0; out: /* free memory */ free_extent_map_tree(em_tree); + + return ret; } /* @@ -246,7 +262,7 @@ static void test_case_3(struct btrfs_fs_info *fs_info, __test_case_3(fs_info, em_tree, (12 * 1024ULL)); } -static void __test_case_4(struct btrfs_fs_info *fs_info, +static int __test_case_4(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree, u64 start) { struct extent_map *em; @@ -255,8 +271,7 @@ static void __test_case_4(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) - /* Skip this test on error. */ - return; + return -ENOMEM; /* Add [0K, 8K) */ em->start = 0; @@ -268,8 +283,10 @@ static void __test_case_4(struct btrfs_fs_info *fs_info, free_extent_map(em); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } /* Add [8K, 24K) */ em->start = SZ_8K; @@ -281,8 +298,10 @@ static void __test_case_4(struct btrfs_fs_info *fs_info, free_extent_map(em); em = alloc_extent_map(); - if (!em) + if (!em) { + ret = -ENOMEM; goto out; + } /* Add [0K, 32K) */ em->start = 0; em->len = SZ_32K; @@ -299,9 +318,12 @@ static void __test_case_4(struct btrfs_fs_info *fs_info, start, len, ret, em->start, em->len, em->block_start, em->block_len); free_extent_map(em); + ret = 0; out: /* free memory */ free_extent_map_tree(em_tree); + + return ret; } /* @@ -340,6 +362,7 @@ int btrfs_test_extent_map(void) { struct btrfs_fs_info *fs_info = NULL; struct extent_map_tree *em_tree; + int ret = 0; test_msg("running extent_map tests"); @@ -354,9 +377,10 @@ int btrfs_test_extent_map(void) } em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL); - if (!em_tree) - /* Skip the test on error. */ + if (!em_tree) { + ret = -ENOMEM; goto out; + } extent_map_tree_init(em_tree); @@ -369,5 +393,5 @@ int btrfs_test_extent_map(void) out: btrfs_free_dummy_fs_info(fs_info); - return 0; + return ret; } -- cgit v1.2.3-59-g8ed1b From d7de4b0864dd8ba844faf40771104e71bbbd129e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 18:41:06 +0100 Subject: btrfs: tests: return errors from extent map test case 1 Replace asserts with error messages and return errors. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index c31b1ac3bc2d..320c2842347c 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -65,7 +65,10 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->block_start = 0; em->block_len = SZ_16K; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [0, 16K)"); + goto out; + } free_extent_map(em); /* Add [16K, 20K) following [0, 16K) */ @@ -80,7 +83,10 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->block_start = SZ_32K; /* avoid merging */ em->block_len = SZ_4K; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [16K, 20K)"); + goto out; + } free_extent_map(em); em = alloc_extent_map(); @@ -95,19 +101,21 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->block_start = start; em->block_len = len; ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); - if (ret) + if (ret) { test_err("case1 [%llu %llu]: ret %d", start, start + len, ret); + goto out; + } if (em && (em->start != 0 || extent_map_end(em) != SZ_16K || - em->block_start != 0 || em->block_len != SZ_16K)) + em->block_start != 0 || em->block_len != SZ_16K)) { test_err( "case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", start, start + len, ret, em->start, em->len, em->block_start, em->block_len); + ret = -EINVAL; + } free_extent_map(em); - ret = 0; out: - /* free memory */ free_extent_map_tree(em_tree); return ret; -- cgit v1.2.3-59-g8ed1b From e71f2e17e8c9f9d4cf51a1ade535d65670d93c72 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 18:41:06 +0100 Subject: btrfs: tests: return errors from extent map test case 2 Replace asserts with error messages and return errors. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 320c2842347c..d56d03d6b781 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -143,7 +143,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->block_start = EXTENT_MAP_INLINE; em->block_len = (u64)-1; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [0, 1K)"); + goto out; + } free_extent_map(em); /* Add [4K, 4K) following [0, 1K) */ @@ -158,7 +161,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->block_start = SZ_4K; em->block_len = SZ_4K; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [4K, 8K)"); + goto out; + } free_extent_map(em); em = alloc_extent_map(); @@ -173,19 +179,21 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->block_start = EXTENT_MAP_INLINE; em->block_len = (u64)-1; ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); - if (ret) + if (ret) { test_err("case2 [0 1K]: ret %d", ret); + goto out; + } if (em && (em->start != 0 || extent_map_end(em) != SZ_1K || - em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) + em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) { test_err( "case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", ret, em->start, em->len, em->block_start, em->block_len); + ret = -EINVAL; + } free_extent_map(em); - ret = 0; out: - /* free memory */ free_extent_map_tree(em_tree); return ret; -- cgit v1.2.3-59-g8ed1b From 992dce7494d6ca7bbfd41e21ec91bee30ecc38ff Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 18:41:06 +0100 Subject: btrfs: tests: return errors from extent map test case 3 Replace asserts with error messages and return errors. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index d56d03d6b781..17182b14ce9d 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -216,7 +216,10 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, em->block_start = SZ_4K; em->block_len = SZ_4K; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [4K, 8K)"); + goto out; + } free_extent_map(em); em = alloc_extent_map(); @@ -231,24 +234,26 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, em->block_start = 0; em->block_len = SZ_16K; ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); - if (ret) + if (ret) { test_err("case3 [0x%llx 0x%llx): ret %d", start, start + len, ret); + goto out; + } /* * Since bytes within em are contiguous, em->block_start is identical to * em->start. */ if (em && (start < em->start || start + len > extent_map_end(em) || - em->start != em->block_start || em->len != em->block_len)) + em->start != em->block_start || em->len != em->block_len)) { test_err( "case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)", start, start + len, ret, em->start, em->len, em->block_start, em->block_len); + ret = -EINVAL; + } free_extent_map(em); - ret = 0; out: - /* free memory */ free_extent_map_tree(em_tree); return ret; -- cgit v1.2.3-59-g8ed1b From 7c6f670052f12a24dc3785ae0179d8199a76cd50 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 18:41:06 +0100 Subject: btrfs: tests: return errors from extent map test case 4 Replace asserts with error messages and return errors. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 17182b14ce9d..35cfb65f1016 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -300,7 +300,10 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->block_start = 0; em->block_len = SZ_8K; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [0, 8K)"); + goto out; + } free_extent_map(em); em = alloc_extent_map(); @@ -315,7 +318,10 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->block_start = SZ_16K; /* avoid merging */ em->block_len = 24 * 1024ULL; ret = add_extent_mapping(em_tree, em, 0); - ASSERT(ret == 0); + if (ret < 0) { + test_err("cannot add extent range [8K, 32K)"); + goto out; + } free_extent_map(em); em = alloc_extent_map(); @@ -329,19 +335,20 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->block_start = 0; em->block_len = SZ_32K; ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); - if (ret) + if (ret) { test_err("case4 [0x%llx 0x%llx): ret %d", start, len, ret); - if (em && - (start < em->start || start + len > extent_map_end(em))) + goto out; + } + if (em && (start < em->start || start + len > extent_map_end(em))) { test_err( "case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)", start, len, ret, em->start, em->len, em->block_start, em->block_len); + ret = -EINVAL; + } free_extent_map(em); - ret = 0; out: - /* free memory */ free_extent_map_tree(em_tree); return ret; -- cgit v1.2.3-59-g8ed1b From ccfada1f650b636733567e1aa059a46424fda926 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Mar 2019 15:05:27 +0100 Subject: btrfs: tests: return error from all extent map test cases The way the extent map tests handle errors does not conform to the rest of the suite, where the first failure is reported and then it stops. Do the same now that we have the errors returned from all the functions. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 44 ++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 35cfb65f1016..5e99c7d40ea1 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -275,12 +275,20 @@ out: * -> add_extent_mapping() * -> add_extent_mapping() */ -static void test_case_3(struct btrfs_fs_info *fs_info, +static int test_case_3(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree) { - __test_case_3(fs_info, em_tree, 0); - __test_case_3(fs_info, em_tree, SZ_8K); - __test_case_3(fs_info, em_tree, (12 * 1024ULL)); + int ret; + + ret = __test_case_3(fs_info, em_tree, 0); + if (ret) + return ret; + ret = __test_case_3(fs_info, em_tree, SZ_8K); + if (ret) + return ret; + ret = __test_case_3(fs_info, em_tree, (12 * 1024ULL)); + + return ret; } static int __test_case_4(struct btrfs_fs_info *fs_info, @@ -379,11 +387,17 @@ out: * # handle -EEXIST when adding * # [0, 32K) */ -static void test_case_4(struct btrfs_fs_info *fs_info, +static int test_case_4(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree) { - __test_case_4(fs_info, em_tree, 0); - __test_case_4(fs_info, em_tree, SZ_4K); + int ret; + + ret = __test_case_4(fs_info, em_tree, 0); + if (ret) + return ret; + ret = __test_case_4(fs_info, em_tree, SZ_4K); + + return ret; } int btrfs_test_extent_map(void) @@ -412,13 +426,19 @@ int btrfs_test_extent_map(void) extent_map_tree_init(em_tree); - test_case_1(fs_info, em_tree); - test_case_2(fs_info, em_tree); - test_case_3(fs_info, em_tree); - test_case_4(fs_info, em_tree); + ret = test_case_1(fs_info, em_tree); + if (ret) + goto out; + ret = test_case_2(fs_info, em_tree); + if (ret) + goto out; + ret = test_case_3(fs_info, em_tree); + if (ret) + goto out; + ret = test_case_4(fs_info, em_tree); - kfree(em_tree); out: + kfree(em_tree); btrfs_free_dummy_fs_info(fs_info); return ret; -- cgit v1.2.3-59-g8ed1b From 6c30474680888df8a652563ca246afdb23534d63 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 15 Mar 2019 17:28:46 +0100 Subject: btrfs: tests: use standard error message after extent map allocation failure Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 5e99c7d40ea1..9bf75f7d2c26 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -56,8 +56,10 @@ static int test_case_1(struct btrfs_fs_info *fs_info, int ret; em = alloc_extent_map(); - if (!em) + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); return -ENOMEM; + } /* Add [0, 16K) */ em->start = 0; @@ -74,6 +76,7 @@ static int test_case_1(struct btrfs_fs_info *fs_info, /* Add [16K, 20K) following [0, 16K) */ em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -91,6 +94,7 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -134,8 +138,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, int ret; em = alloc_extent_map(); - if (!em) + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); return -ENOMEM; + } /* Add [0, 1K) */ em->start = 0; @@ -152,6 +158,7 @@ static int test_case_2(struct btrfs_fs_info *fs_info, /* Add [4K, 4K) following [0, 1K) */ em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -169,6 +176,7 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -207,8 +215,10 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, int ret; em = alloc_extent_map(); - if (!em) + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); return -ENOMEM; + } /* Add [4K, 8K) */ em->start = SZ_4K; @@ -224,6 +234,7 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -299,8 +310,10 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, int ret; em = alloc_extent_map(); - if (!em) + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); return -ENOMEM; + } /* Add [0K, 8K) */ em->start = 0; @@ -316,6 +329,7 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } @@ -334,6 +348,7 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em = alloc_extent_map(); if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); ret = -ENOMEM; goto out; } -- cgit v1.2.3-59-g8ed1b From 43f7cddc6e5ad161b5cb818c45bdbed8631dce74 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Mar 2019 14:14:35 +0100 Subject: btrfs: tests: use SZ_ constants everywhere There are a few unconverted constants that are not powers of two and haven't been converted. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 9bf75f7d2c26..a09783f19011 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -297,7 +297,7 @@ static int test_case_3(struct btrfs_fs_info *fs_info, ret = __test_case_3(fs_info, em_tree, SZ_8K); if (ret) return ret; - ret = __test_case_3(fs_info, em_tree, (12 * 1024ULL)); + ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K)); return ret; } @@ -336,9 +336,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, /* Add [8K, 24K) */ em->start = SZ_8K; - em->len = 24 * 1024ULL; + em->len = 24 * SZ_1K; em->block_start = SZ_16K; /* avoid merging */ - em->block_len = 24 * 1024ULL; + em->block_len = 24 * SZ_1K; ret = add_extent_mapping(em_tree, em, 0); if (ret < 0) { test_err("cannot add extent range [8K, 32K)"); -- cgit v1.2.3-59-g8ed1b From 3173fd926c465aca52740497a5f5fac538a271fe Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Mar 2019 14:19:33 +0100 Subject: btrfs: tests: fix comments about tested extent map ranges Comments about ranges did not match the code, the correct calculation is to use start and start+len as the interval boundaries. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index a09783f19011..87aeabe9d610 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -155,7 +155,7 @@ static int test_case_2(struct btrfs_fs_info *fs_info, } free_extent_map(em); - /* Add [4K, 4K) following [0, 1K) */ + /* Add [4K, 8K) following [0, 1K) */ em = alloc_extent_map(); if (!em) { test_std_err(TEST_ALLOC_EXTENT_MAP); @@ -334,7 +334,7 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, goto out; } - /* Add [8K, 24K) */ + /* Add [8K, 32K) */ em->start = SZ_8K; em->len = 24 * SZ_1K; em->block_start = SZ_16K; /* avoid merging */ -- cgit v1.2.3-59-g8ed1b From 752dbe48e22aa749cbe63d52fda952108ae6249f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Mar 2019 13:54:36 +0100 Subject: btrfs: tests: drop messages when some tests finish The messages like 'extent I/O tests finished' are redundant, if the test fails it's quite obvious in the log and hang is also noticeable. No other then extent_io and free space tree tests print that so make it consistent. Signed-off-by: David Sterba --- fs/btrfs/tests/extent-io-tests.c | 1 - fs/btrfs/tests/free-space-tests.c | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index e88b7c9667f8..7bf4d5734dbe 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -444,6 +444,5 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) ret = test_eb_bitmaps(sectorsize, nodesize); out: - test_msg("extent I/O tests finished"); return ret; } diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index dcbe526e5698..d0fdc94a5d61 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -876,6 +876,5 @@ out: btrfs_free_dummy_block_group(cache); btrfs_free_dummy_root(root); btrfs_free_dummy_fs_info(fs_info); - test_msg("free space cache tests finished"); return ret; } -- cgit v1.2.3-59-g8ed1b From e4fa7469eb7a9331b42f9625bfcb2003a6e7eb1a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Mar 2019 14:06:55 +0100 Subject: btrfs: tests: unify messages when tests start - make the messages more visually consistent and use same format "running ... test", any error or other warning can be easily spotted - move some message to the test entry function - add message to the inode tests Example output: [ 8.187391] Btrfs loaded, crc32c=crc32c-generic, assert=on, integrity-checker=on, ref-verify=on [ 8.189476] BTRFS: selftest: sectorsize: 4096 nodesize: 4096 [ 8.190761] BTRFS: selftest: running btrfs free space cache tests [ 8.192245] BTRFS: selftest: running extent only tests [ 8.193573] BTRFS: selftest: running bitmap only tests [ 8.194876] BTRFS: selftest: running bitmap and extent tests [ 8.196166] BTRFS: selftest: running space stealing from bitmap to extent tests [ 8.198026] BTRFS: selftest: running extent buffer operation tests [ 8.199328] BTRFS: selftest: running btrfs_split_item tests [ 8.200653] BTRFS: selftest: running extent I/O tests [ 8.201808] BTRFS: selftest: running find delalloc tests [ 8.320733] BTRFS: selftest: running extent buffer bitmap tests [ 8.340795] BTRFS: selftest: running inode tests [ 8.341766] BTRFS: selftest: running btrfs_get_extent tests [ 8.342981] BTRFS: selftest: running hole first btrfs_get_extent test [ 8.344342] BTRFS: selftest: running outstanding_extents tests [ 8.345575] BTRFS: selftest: running qgroup tests [ 8.346537] BTRFS: selftest: running qgroup add/remove tests [ 8.347725] BTRFS: selftest: running qgroup multiple refs test [ 8.354982] BTRFS: selftest: running free space tree tests [ 8.372175] BTRFS: selftest: sectorsize: 4096 nodesize: 8192 [ 8.373539] BTRFS: selftest: running btrfs free space cache tests [ 8.374989] BTRFS: selftest: running extent only tests [ 8.376236] BTRFS: selftest: running bitmap only tests [ 8.377483] BTRFS: selftest: running bitmap and extent tests [ 8.378854] BTRFS: selftest: running space stealing from bitmap to extent tests ... Signed-off-by: David Sterba --- fs/btrfs/tests/free-space-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 11 ++++++++--- fs/btrfs/tests/qgroup-tests.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index d0fdc94a5d61..af89f66f9e63 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -404,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache, }; const struct btrfs_free_space_op *orig_free_space_ops; - test_msg("running space stealing from bitmap to extent"); + test_msg("running space stealing from bitmap to extent tests"); /* * For this test, we want to ensure we end up with an extent entry diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 46571cd27513..3d2c7abda5de 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -226,6 +226,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) u64 offset; int ret = -ENOMEM; + test_msg("running btrfs_get_extent tests"); + inode = btrfs_new_test_inode(); if (!inode) { test_std_err(TEST_ALLOC_INODE); @@ -827,6 +829,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) struct extent_map *em = NULL; int ret = -ENOMEM; + test_msg("running hole first btrfs_get_extent test"); + inode = btrfs_new_test_inode(); if (!inode) { test_std_err(TEST_ALLOC_INODE); @@ -927,6 +931,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) struct btrfs_root *root = NULL; int ret = -ENOMEM; + test_msg("running outstanding_extents tests"); + inode = btrfs_new_test_inode(); if (!inode) { test_std_err(TEST_ALLOC_INODE); @@ -1110,17 +1116,16 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize) { int ret; + test_msg("running inode tests"); + set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); - test_msg("running btrfs_get_extent tests"); ret = test_btrfs_get_extent(sectorsize, nodesize); if (ret) return ret; - test_msg("running hole first btrfs_get_extent test"); ret = test_hole_first(sectorsize, nodesize); if (ret) return ret; - test_msg("running outstanding_extents tests"); return test_extent_accounting(sectorsize, nodesize); } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index c85e4b955939..09aaca1efd62 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -215,7 +215,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, btrfs_init_dummy_trans(&trans, fs_info); - test_msg("qgroup basic add"); + test_msg("running qgroup add/remove tests"); ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID); if (ret) { test_err("couldn't create a qgroup %d", ret); @@ -316,7 +316,7 @@ static int test_multiple_refs(struct btrfs_root *root, btrfs_init_dummy_trans(&trans, fs_info); - test_msg("qgroup multiple refs test"); + test_msg("running qgroup multiple refs test"); /* * We have BTRFS_FS_TREE_OBJECTID created already from the -- cgit v1.2.3-59-g8ed1b From d4eb671a0882ee58774e201b1ada3d19b99394b9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Mar 2019 20:20:48 +0100 Subject: btrfs: remove stale definition of BUFFER_LRU_MAX Long time ago (2008), the extent buffers were organized in a LRU list and switched to rb-tree in 6af118ce51b52ced ("Btrfs: Index extent buffers in an rbtree"). There was one stale macro definition left. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e304d5f50c5a..7eb7f8a30ec2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -109,8 +109,6 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) #endif -#define BUFFER_LRU_MAX 64 - struct tree_entry { u64 start; u64 end; -- cgit v1.2.3-59-g8ed1b From 247462a5aca5154c8542830e3ec4d1fce2733433 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Mar 2019 20:21:05 +0100 Subject: btrfs: move tree block wait and write helpers to tree-log The wrapper names better describe what's happening so they're not deleted though they're trivial, but at least moved closer to their place of use. Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 13 ------------- fs/btrfs/disk-io.h | 2 -- fs/btrfs/tree-log.c | 11 +++++++++++ 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f4ef3a1a321b..b29bb6e1a283 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1078,19 +1078,6 @@ struct extent_buffer *btrfs_find_create_tree_block( return alloc_extent_buffer(fs_info, bytenr); } - -int btrfs_write_tree_block(struct extent_buffer *buf) -{ - return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start, - buf->start + buf->len - 1); -} - -void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) -{ - filemap_fdatawait_range(buf->pages[0]->mapping, - buf->start, buf->start + buf->len - 1); -} - /* * Read tree block at logical address @bytenr and do variant basic but critical * verification. diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 67a9fe2d29c7..a94ce82a2be0 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -126,8 +126,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, extent_submit_bio_start_t *submit_bio_start); blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, int mirror_num); -int btrfs_write_tree_block(struct extent_buffer *buf); -void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 561884f60d35..a59e98c9fbb9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root) } } +static int btrfs_write_tree_block(struct extent_buffer *buf) +{ + return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start, + buf->start + buf->len - 1); +} + +static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) +{ + filemap_fdatawait_range(buf->pages[0]->mapping, + buf->start, buf->start + buf->len - 1); +} /* * the walk control struct is used to pass state down the chain when -- cgit v1.2.3-59-g8ed1b From 290342f66108638048997b71393f0dd88e771352 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 14:02:25 +0100 Subject: btrfs: use BUG() instead of BUG_ON(1) BUG_ON(1) leads to bogus warnings from clang when CONFIG_PROFILE_ANNOTATED_BRANCHES is set: fs/btrfs/volumes.c:5041:3: error: variable 'max_chunk_size' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] BUG_ON(1); ^~~~~~~~~ include/asm-generic/bug.h:61:36: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^~~~~~~~~~~~~~~~~~~ include/linux/compiler.h:48:23: note: expanded from macro 'unlikely' # define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/btrfs/volumes.c:5046:9: note: uninitialized use occurs here max_chunk_size); ^~~~~~~~~~~~~~ include/linux/kernel.h:860:36: note: expanded from macro 'min' #define min(x, y) __careful_cmp(x, y, <) ^ include/linux/kernel.h:853:17: note: expanded from macro '__careful_cmp' __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) ^ include/linux/kernel.h:847:25: note: expanded from macro '__cmp_once' typeof(y) unique_y = (y); \ ^ fs/btrfs/volumes.c:5041:3: note: remove the 'if' if its condition is always true BUG_ON(1); ^ include/asm-generic/bug.h:61:32: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^ fs/btrfs/volumes.c:4993:20: note: initialize the variable 'max_chunk_size' to silence this warning u64 max_chunk_size; ^ = 0 Change it to BUG() so clang can see that this code path can never continue. Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: Arnd Bergmann Signed-off-by: David Sterba --- fs/btrfs/backref.c | 4 ++-- fs/btrfs/ctree.c | 4 ++-- fs/btrfs/extent_io.c | 8 ++++---- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/volumes.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 11459fe84a29..2602072ed906 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, count = node->ref_mod * -1; break; default: - BUG_ON(1); + BUG(); } *total_refs += count; switch (node->type) { @@ -1747,7 +1747,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, else if (flags & BTRFS_EXTENT_FLAG_DATA) *flags_ret = BTRFS_EXTENT_FLAG_DATA; else - BUG_ON(1); + BUG(); return 0; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 65b12963e72b..7b1bc25f10cb 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4684,7 +4684,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, btrfs_print_leaf(leaf); btrfs_crit(fs_info, "slot %d too large, nritems %d", slot, nritems); - BUG_ON(1); + BUG(); } /* @@ -4764,7 +4764,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, btrfs_print_leaf(leaf); btrfs_crit(fs_info, "slot %d old_data %d data_end %d", slot, old_data, data_end); - BUG_ON(1); + BUG(); } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7eb7f8a30ec2..66b4637b29b3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5786,13 +5786,13 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, btrfs_err(fs_info, "memmove bogus src_offset %lu move len %lu dst len %lu", src_offset, len, dst->len); - BUG_ON(1); + BUG(); } if (dst_offset + len > dst->len) { btrfs_err(fs_info, "memmove bogus dst_offset %lu move len %lu dst len %lu", dst_offset, len, dst->len); - BUG_ON(1); + BUG(); } while (len > 0) { @@ -5833,13 +5833,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, btrfs_err(fs_info, "memmove bogus src_offset %lu move len %lu len %lu", src_offset, len, dst->len); - BUG_ON(1); + BUG(); } if (dst_offset + len > dst->len) { btrfs_err(fs_info, "memmove bogus dst_offset %lu move len %lu len %lu", dst_offset, len, dst->len); - BUG_ON(1); + BUG(); } if (dst_offset < src_offset) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 94c1c86fd18a..5e6aee84daee 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1025,7 +1025,7 @@ delete_extent_item: continue; } - BUG_ON(1); + BUG(); } if (!ret && del_nr > 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a59619631194..4706018d1f9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1451,7 +1451,7 @@ next_slot: extent_end = ALIGN(extent_end, fs_info->sectorsize); } else { - BUG_ON(1); + BUG(); } out_check: if (extent_end <= start) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index db934ceae9c1..fcb0d3f34e09 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5038,7 +5038,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } else { btrfs_err(info, "invalid chunk type 0x%llx requested", type); - BUG_ON(1); + BUG(); } /* We don't want a chunk larger than 10% of writable space */ -- cgit v1.2.3-59-g8ed1b From 7984ae52bbf75def1d7fcbf4c902e8f787e1ef9d Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Mon, 25 Feb 2019 13:07:44 -0600 Subject: btrfs: Perform locking/unlocking in btrfs_remap_file_range() Move code to make it more readable, so as locking and unlocking is done in the same function. The generic checks that are now performed in the locked section are unaffected. Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4d945b3d88e3..8c9a908d3acc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3950,16 +3950,10 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, return -EXDEV; } - if (same_inode) - inode_lock(inode_in); - else - lock_two_nondirectories(inode_in, inode_out); - /* don't make the dst file partly checksummed */ if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) { - ret = -EINVAL; - goto out_unlock; + return -EINVAL; } /* @@ -3993,26 +3987,14 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), wb_len); if (ret < 0) - goto out_unlock; + return ret; ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), wb_len); if (ret < 0) - goto out_unlock; + return ret; - ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, + return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); - if (ret < 0 || *len == 0) - goto out_unlock; - - return 0; - - out_unlock: - if (same_inode) - inode_unlock(inode_in); - else - unlock_two_nondirectories(inode_in, inode_out); - - return ret; } loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, @@ -4027,16 +4009,22 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; + if (same_inode) + inode_lock(src_inode); + else + lock_two_nondirectories(src_inode, dst_inode); + ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, &len, remap_flags); if (ret < 0 || len == 0) - return ret; + goto out_unlock; if (remap_flags & REMAP_FILE_DEDUP) ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); else ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); +out_unlock: if (same_inode) inode_unlock(src_inode); else -- cgit v1.2.3-59-g8ed1b From 7d157c3d4810cfb9ea1bb0977e8e2db02032173a Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Tue, 26 Mar 2019 21:39:34 +0000 Subject: btrfs: use common file type conversion Deduplicate the btrfs file type conversion implementation - file systems that use the same file types as defined by POSIX do not need to define their own versions and can use the common helper functions decared in fs_types.h and implemented in fs_types.c Common implementation can be found via commit: bbe7449e2599 "fs: common implementation of file type" Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Phillip Potter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 2 -- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/inode.c | 32 +++++++++++++++----------------- include/uapi/linux/btrfs_tree.h | 2 ++ 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6f5d07415dab..b16c13d51be0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -203,8 +203,6 @@ struct btrfs_inode { struct inode vfs_inode; }; -extern unsigned char btrfs_filetype_table[]; - static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) { return container_of(inode, struct btrfs_inode, vfs_inode); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c669f250d4a0..e61947f5eb76 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1692,7 +1692,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, name = (char *)(di + 1); name_len = btrfs_stack_dir_name_len(di); - d_type = btrfs_filetype_table[di->type]; + d_type = fs_ftype_to_dtype(di->type); btrfs_disk_key_to_cpu(&location, &di->location); over = !dir_emit(ctx, name, name_len, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4706018d1f9e..1d81a7a78a3f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -73,17 +73,6 @@ struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_path_cachep; struct kmem_cache *btrfs_free_space_cachep; -#define S_SHIFT 12 -static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, -}; - static int btrfs_setsize(struct inode *inode, struct iattr *attr); static int btrfs_truncate(struct inode *inode, bool skip_writeback); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); @@ -5797,10 +5786,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -unsigned char btrfs_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - /* * All this infrastructure exists because dir_emit can fault, and we are holding * the tree lock when doing readdir. For now just allocate a buffer and copy @@ -5939,7 +5924,7 @@ again: name_ptr = (char *)(entry + 1); read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); - put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], + put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)), &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); put_unaligned(location.objectid, &entry->ino); @@ -6344,7 +6329,20 @@ fail: static inline u8 btrfs_inode_type(struct inode *inode) { - return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; + /* + * Compile-time asserts that generic FT_* types still match + * BTRFS_FT_* types + */ + BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN); + BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE); + BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR); + BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV); + BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV); + BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO); + BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK); + BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK); + + return fs_umode_to_ftype(inode->i_mode); } /* diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index e974f4bb5378..421239b98db2 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -307,6 +307,8 @@ * * Used by: * struct btrfs_dir_item.type + * + * Values 0..7 must match common file type values in fs_types.h. */ #define BTRFS_FT_UNKNOWN 0 #define BTRFS_FT_REG_FILE 1 -- cgit v1.2.3-59-g8ed1b From 9df76fb5442abdcf5dcc1b2676927482614ac43f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:21:41 +0100 Subject: btrfs: get fs_info from eb in lock_extent_buffer_for_io We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66b4637b29b3..853a5fcec193 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3497,11 +3497,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) * Return >0 is same as 0, except bio is not submitted * Return <0 if something went wrong, no page is locked */ -static noinline_for_stack int -lock_extent_buffer_for_io(struct extent_buffer *eb, - struct btrfs_fs_info *fs_info, +static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, struct extent_page_data *epd) { + struct btrfs_fs_info *fs_info = eb->fs_info; int i, num_pages, failed_page_nr; int flush = 0; int ret = 0; @@ -3829,7 +3828,7 @@ retry: continue; prev_eb = eb; - ret = lock_extent_buffer_for_io(eb, fs_info, &epd); + ret = lock_extent_buffer_for_io(eb, &epd); if (!ret) { free_extent_buffer(eb); continue; -- cgit v1.2.3-59-g8ed1b From 20a1fbf97e11204e099a95167f1851fc54296a00 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:23:44 +0100 Subject: btrfs: get fs_info from eb in repair_eb_io_failure We can read fs_info from extent buffer and can drop it from the parameters. As all callsites are updated, add the btrfs_ prefix as the function is exported. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/extent_io.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b29bb6e1a283..9c5b87bc0813 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -497,7 +497,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, } if (failed && !ret && failed_mirror) - repair_eb_io_failure(fs_info, eb, failed_mirror); + btrfs_repair_eb_io_failure(eb, failed_mirror); return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 853a5fcec193..276c9250ca23 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2089,9 +2089,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, return 0; } -int repair_eb_io_failure(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int mirror_num) +int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num) { + struct btrfs_fs_info *fs_info = eb->fs_info; u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 586baed03780..c4ec104ac157 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -502,8 +502,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info, struct extent_io_tree *io_tree, u64 start, struct page *page, u64 ino, unsigned int pg_offset); void end_extent_writepage(struct page *page, int err, u64 start, u64 end); -int repair_eb_io_failure(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int mirror_num); +int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num); /* * When IO fails, either with EIO or csum verification fails, we -- cgit v1.2.3-59-g8ed1b From 0ab020632860e202713a7615f82a59f595f028ae Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:27:57 +0100 Subject: btrfs: get fs_info from eb in write_one_eb We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 276c9250ca23..d33d4d2ef099 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3681,10 +3681,10 @@ static void end_bio_extent_buffer_writepage(struct bio *bio) } static noinline_for_stack int write_one_eb(struct extent_buffer *eb, - struct btrfs_fs_info *fs_info, struct writeback_control *wbc, struct extent_page_data *epd) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct block_device *bdev = fs_info->fs_devices->latest_bdev; struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; @@ -3753,7 +3753,6 @@ int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc) { struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; - struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; struct extent_buffer *eb, *prev_eb = NULL; struct extent_page_data epd = { .bio = NULL, @@ -3834,7 +3833,7 @@ retry: continue; } - ret = write_one_eb(eb, fs_info, wbc, &epd); + ret = write_one_eb(eb, wbc, &epd); if (ret) { done = 1; free_extent_buffer(eb); -- cgit v1.2.3-59-g8ed1b From 8f881e8c1880fb7029e74ccdaa7891bd042b6c63 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:33:10 +0100 Subject: btrfs: get fs_info from eb in leaf_data_end We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 24 ++++++++++++------------ fs/btrfs/ctree.h | 5 ++--- fs/btrfs/extent_io.c | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7b1bc25f10cb..5be2beef18be 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3665,10 +3665,10 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, right_nritems = btrfs_header_nritems(right); push_space = btrfs_item_end_nr(left, left_nritems - push_items); - push_space -= leaf_data_end(fs_info, left); + push_space -= leaf_data_end(left); /* make room in the right data area */ - data_end = leaf_data_end(fs_info, right); + data_end = leaf_data_end(right); memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + data_end - push_space, BTRFS_LEAF_DATA_OFFSET + data_end, @@ -3677,7 +3677,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, /* copy from the left data area */ copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, - BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left), + BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left), push_space); memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), @@ -3892,7 +3892,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, btrfs_item_offset_nr(right, push_items - 1); copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + - leaf_data_end(fs_info, left) - push_space, + leaf_data_end(left) - push_space, BTRFS_LEAF_DATA_OFFSET + btrfs_item_offset_nr(right, push_items - 1), push_space); @@ -3919,11 +3919,11 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, if (push_items < right_nritems) { push_space = btrfs_item_offset_nr(right, push_items - 1) - - leaf_data_end(fs_info, right); + leaf_data_end(right); memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, BTRFS_LEAF_DATA_OFFSET + - leaf_data_end(fs_info, right), push_space); + leaf_data_end(right), push_space); memmove_extent_buffer(right, btrfs_item_nr_offset(0), btrfs_item_nr_offset(push_items), @@ -4065,7 +4065,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, nritems = nritems - mid; btrfs_set_header_nritems(right, nritems); - data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(fs_info, l); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l); copy_extent_buffer(right, l, btrfs_item_nr_offset(0), btrfs_item_nr_offset(mid), @@ -4074,7 +4074,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, copy_extent_buffer(right, l, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size, BTRFS_LEAF_DATA_OFFSET + - leaf_data_end(fs_info, l), data_copy_size); + leaf_data_end(l), data_copy_size); rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); @@ -4577,7 +4577,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, return; nritems = btrfs_header_nritems(leaf); - data_end = leaf_data_end(fs_info, leaf); + data_end = leaf_data_end(leaf); old_data_start = btrfs_item_offset_nr(leaf, slot); @@ -4670,7 +4670,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - data_end = leaf_data_end(fs_info, leaf); + data_end = leaf_data_end(leaf); if (btrfs_leaf_free_space(fs_info, leaf) < data_size) { btrfs_print_leaf(leaf); @@ -4748,7 +4748,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); - data_end = leaf_data_end(fs_info, leaf); + data_end = leaf_data_end(leaf); if (btrfs_leaf_free_space(fs_info, leaf) < total_size) { btrfs_print_leaf(leaf); @@ -4976,7 +4976,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems = btrfs_header_nritems(leaf); if (slot + nr != nritems) { - int data_end = leaf_data_end(fs_info, leaf); + int data_end = leaf_data_end(leaf); memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end + dsize, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c03852d1aa34..7fabe4fd0800 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2443,13 +2443,12 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) * this returns the address of the start of the last item, * which is the stop of the leaf data stack */ -static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *leaf) +static inline unsigned int leaf_data_end(const struct extent_buffer *leaf) { u32 nr = btrfs_header_nritems(leaf); if (nr == 0) - return BTRFS_LEAF_DATA_SIZE(fs_info); + return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); return btrfs_item_offset_nr(leaf, nr - 1); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d33d4d2ef099..a3e3e95c632e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3710,7 +3710,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 */ start = btrfs_item_nr_offset(nritems); - end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb); + end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb); memzero_extent_buffer(eb, start, end - start); } -- cgit v1.2.3-59-g8ed1b From bcdc428cfe7dda315ad128cbe06abe72add2c73f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:14:33 +0100 Subject: btrfs: get fs_info from eb in btrfs_exclude_logged_extents We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/tree-log.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7fabe4fd0800..1ec08bb1c2e0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2708,8 +2708,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes); -int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb); +int btrfs_exclude_logged_extents(struct extent_buffer *eb); int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, u64 bytenr); struct btrfs_block_group_cache *btrfs_lookup_block_group( diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b0c86a817a99..44fd4553004b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6601,9 +6601,9 @@ out_lock: return ret; } -int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) +int btrfs_exclude_logged_extents(struct extent_buffer *eb) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_file_extent_item *item; struct btrfs_key key; int found_type; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a59e98c9fbb9..4cb61cb72c4e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -315,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log, if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { if (wc->pin && btrfs_header_level(eb) == 0) - ret = btrfs_exclude_logged_extents(fs_info, eb); + ret = btrfs_exclude_logged_extents(eb); if (wc->write) btrfs_write_tree_block(eb); if (wc->wait) -- cgit v1.2.3-59-g8ed1b From b0c9b3b05dcbe9463550277af223f5d173aec522 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:12:00 +0100 Subject: btrfs: get fs_info from eb in check_tree_block_fsid We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c5b87bc0813..ab7e2c5ba556 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -539,9 +539,9 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) return 0; } -static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) +static int check_tree_block_fsid(struct extent_buffer *eb) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; u8 fsid[BTRFS_FSID_SIZE]; int ret = 1; @@ -611,7 +611,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; goto err; } - if (check_tree_block_fsid(fs_info, eb)) { + if (check_tree_block_fsid(eb)) { btrfs_err_rl(fs_info, "bad fsid on block %llu", eb->start); ret = -EIO; -- cgit v1.2.3-59-g8ed1b From ed874f0db89724d0af1b4793fb518f640f333b0b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:22:04 +0100 Subject: btrfs: get fs_info from eb in tree_mod_log_eb_copy We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5be2beef18be..b83627a372b1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -726,11 +726,11 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) return __tree_mod_log_search(fs_info, start, min_seq, 0); } -static noinline int -tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, +static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, int nr_items) { + struct btrfs_fs_info *fs_info = dst->fs_info; int ret = 0; struct tree_mod_elem **tm_list = NULL; struct tree_mod_elem **tm_list_add, **tm_list_rem; @@ -3249,8 +3249,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, } else push_items = min(src_nritems - 8, push_items); - ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0, - push_items); + ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3325,8 +3324,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, (dst_nritems) * sizeof(struct btrfs_key_ptr)); - ret = tree_mod_log_eb_copy(fs_info, dst, src, 0, - src_nritems - push_items, push_items); + ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, + push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3511,7 +3510,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, root_add_used(root, fs_info->nodesize); ASSERT(btrfs_header_level(c) == level); - ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); + ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); if (ret) { btrfs_abort_transaction(trans, ret); return ret; -- cgit v1.2.3-59-g8ed1b From 6a884d7d527f32b5ea80dc472968a5430ffee9f5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:30:02 +0100 Subject: btrfs: get fs_info from eb in clean_tree_block We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 16 ++++++++-------- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 6 +++--- fs/btrfs/free-space-tree.c | 2 +- fs/btrfs/qgroup.c | 2 +- fs/btrfs/tree-log.c | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b83627a372b1..76e57a8c98df 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -970,7 +970,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (ret) return ret; } - clean_tree_block(fs_info, buf); + btrfs_clean_tree_block(buf); *last_ref = 1; } return 0; @@ -1888,7 +1888,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, path->locks[level] = 0; path->nodes[level] = NULL; - clean_tree_block(fs_info, mid); + btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); @@ -1949,7 +1949,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - clean_tree_block(fs_info, right); + btrfs_clean_tree_block(right); btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); @@ -1994,7 +1994,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { - clean_tree_block(fs_info, mid); + btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); @@ -3704,7 +3704,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, if (left_nritems) btrfs_mark_buffer_dirty(left); else - clean_tree_block(fs_info, left); + btrfs_clean_tree_block(left); btrfs_mark_buffer_dirty(right); @@ -3716,7 +3716,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; if (btrfs_header_nritems(path->nodes[0]) == 0) - clean_tree_block(fs_info, path->nodes[0]); + btrfs_clean_tree_block(path->nodes[0]); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3944,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, if (right_nritems) btrfs_mark_buffer_dirty(right); else - clean_tree_block(fs_info, right); + btrfs_clean_tree_block(right); btrfs_item_key(right, &disk_key, 0); fixup_low_keys(path, &disk_key, 1); @@ -5005,7 +5005,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(leaf, 0); } else { btrfs_set_path_blocking(path); - clean_tree_block(fs_info, leaf); + btrfs_clean_tree_block(leaf); btrfs_del_leaf(trans, root, path, leaf); } } else { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ab7e2c5ba556..64978fc36745 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1107,9 +1107,9 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, } -void clean_tree_block(struct btrfs_fs_info *fs_info, - struct extent_buffer *buf) +void btrfs_clean_tree_block(struct extent_buffer *buf) { + struct btrfs_fs_info *fs_info = buf->fs_info; if (btrfs_header_generation(buf) == fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a94ce82a2be0..917634f2bae9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -51,7 +51,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr); -void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf); +void btrfs_clean_tree_block(struct extent_buffer *buf); int open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44fd4553004b..aa52b0995fba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8557,7 +8557,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); - clean_tree_block(fs_info, buf); + btrfs_clean_tree_block(buf); clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); btrfs_set_lock_blocking_write(buf); @@ -9252,14 +9252,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret); } } - /* make block locked assertion in clean_tree_block happy */ + /* make block locked assertion in btrfs_clean_tree_block happy */ if (!path->locks[level] && btrfs_header_generation(eb) == trans->transid) { btrfs_tree_lock(eb); btrfs_set_lock_blocking_write(eb); path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; } - clean_tree_block(fs_info, eb); + btrfs_clean_tree_block(eb); } if (eb == root->node) { diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index e5089087eaa6..4859e09d2af0 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1248,7 +1248,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) list_del(&free_space_root->dirty_list); btrfs_tree_lock(free_space_root->node); - clean_tree_block(fs_info, free_space_root->node); + btrfs_clean_tree_block(free_space_root->node); btrfs_tree_unlock(free_space_root->node); btrfs_free_tree_block(trans, free_space_root, free_space_root->node, 0, 1); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e659d9d61107..95361ccaa488 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1101,7 +1101,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) list_del("a_root->dirty_list); btrfs_tree_lock(quota_root->node); - clean_tree_block(fs_info, quota_root->node); + btrfs_clean_tree_block(quota_root->node); btrfs_tree_unlock(quota_root->node); btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4cb61cb72c4e..79f75bec9f40 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2736,7 +2736,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking_write(next); - clean_tree_block(fs_info, next); + btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } else { @@ -2820,7 +2820,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking_write(next); - clean_tree_block(fs_info, next); + btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } else { @@ -2902,7 +2902,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking_write(next); - clean_tree_block(fs_info, next); + btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } else { -- cgit v1.2.3-59-g8ed1b From e902baac656479bdb956224ed693578424cf9e96 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:36:46 +0100 Subject: btrfs: get fs_info from eb in btrfs_leaf_free_space We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 58 +++++++++++++++++++++++------------------------- fs/btrfs/ctree.h | 3 +-- fs/btrfs/delayed-inode.c | 3 +-- fs/btrfs/file-item.c | 4 ++-- fs/btrfs/file.c | 2 +- fs/btrfs/print-tree.c | 2 +- fs/btrfs/xattr.c | 2 +- 7 files changed, 35 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 76e57a8c98df..597337b07afa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2716,7 +2716,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; int slot; int ret; @@ -2914,7 +2913,7 @@ cow_done: } else { p->slots[level] = slot; if (ins_len > 0 && - btrfs_leaf_free_space(fs_info, b) < ins_len) { + btrfs_leaf_free_space(b) < ins_len) { if (write_lock_level < 1) { write_lock_level = 1; btrfs_release_path(p); @@ -3574,9 +3573,9 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf) +noinline int btrfs_leaf_free_space(struct extent_buffer *leaf) { + struct btrfs_fs_info *fs_info = leaf->fs_info; int nritems = btrfs_header_nritems(leaf); int ret; @@ -3635,7 +3634,8 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, if (path->slots[0] > i) break; if (path->slots[0] == i) { - int space = btrfs_leaf_free_space(fs_info, left); + int space = btrfs_leaf_free_space(left); + if (space + push_space * 2 > free_space) break; } @@ -3778,7 +3778,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_tree_lock(right); btrfs_set_lock_blocking_write(right); - free_space = btrfs_leaf_free_space(fs_info, right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size) goto out_unlock; @@ -3788,7 +3788,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) goto out_unlock; - free_space = btrfs_leaf_free_space(fs_info, right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size) goto out_unlock; @@ -3858,7 +3858,8 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, if (path->slots[0] < i) break; if (path->slots[0] == i) { - int space = btrfs_leaf_free_space(fs_info, right); + int space = btrfs_leaf_free_space(right); + if (space + push_space * 2 > free_space) break; } @@ -4012,7 +4013,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_tree_lock(left); btrfs_set_lock_blocking_write(left); - free_space = btrfs_leaf_free_space(fs_info, left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size) { ret = 1; goto out; @@ -4028,7 +4029,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - free_space = btrfs_leaf_free_space(fs_info, left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size) { ret = 1; goto out; @@ -4124,7 +4125,6 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans, struct btrfs_path *path, int data_size) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret; int progress = 0; int slot; @@ -4133,7 +4133,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans, slot = path->slots[0]; if (slot < btrfs_header_nritems(path->nodes[0])) - space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]); + space_needed -= btrfs_leaf_free_space(path->nodes[0]); /* * try to push all the items after our slot into the @@ -4154,14 +4154,14 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans, if (path->slots[0] == 0 || path->slots[0] == nritems) return 0; - if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size) + if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) return 0; /* try to push all the items before our slot into the next leaf */ slot = path->slots[0]; space_needed = data_size; if (slot > 0) - space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]); + space_needed -= btrfs_leaf_free_space(path->nodes[0]); ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); if (ret < 0) return ret; @@ -4210,7 +4210,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int space_needed = data_size; if (slot < btrfs_header_nritems(l)) - space_needed -= btrfs_leaf_free_space(fs_info, l); + space_needed -= btrfs_leaf_free_space(l); wret = push_leaf_right(trans, root, path, space_needed, space_needed, 0, 0); @@ -4219,8 +4219,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, if (wret) { space_needed = data_size; if (slot > 0) - space_needed -= btrfs_leaf_free_space(fs_info, - l); + space_needed -= btrfs_leaf_free_space(l); wret = push_leaf_left(trans, root, path, space_needed, space_needed, 0, (u32)-1); if (wret < 0) @@ -4229,7 +4228,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, l = path->nodes[0]; /* did the pushes work? */ - if (btrfs_leaf_free_space(fs_info, l) >= data_size) + if (btrfs_leaf_free_space(l) >= data_size) return 0; } @@ -4336,7 +4335,7 @@ again: push_for_double: push_for_double_split(trans, root, path, data_size); tried_avoid_double = 1; - if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size) + if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) return 0; goto again; } @@ -4345,7 +4344,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int ins_len) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; @@ -4359,7 +4357,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && key.type != BTRFS_EXTENT_CSUM_KEY); - if (btrfs_leaf_free_space(fs_info, leaf) >= ins_len) + if (btrfs_leaf_free_space(leaf) >= ins_len) return 0; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -4386,7 +4384,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, goto err; /* the leaf has changed, it now has room. return now */ - if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= ins_len) + if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len) goto err; if (key.type == BTRFS_EXTENT_DATA_KEY) { @@ -4425,7 +4423,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info, struct btrfs_disk_key disk_key; leaf = path->nodes[0]; - BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < sizeof(struct btrfs_item)); + BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)); btrfs_set_path_blocking(path); @@ -4474,7 +4472,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info, item_size - split_offset); btrfs_mark_buffer_dirty(leaf); - BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < 0); + BUG_ON(btrfs_leaf_free_space(leaf) < 0); kfree(buf); return 0; } @@ -4642,7 +4640,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, btrfs_set_item_size(leaf, item, new_size); btrfs_mark_buffer_dirty(leaf); - if (btrfs_leaf_free_space(fs_info, leaf) < 0) { + if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } @@ -4671,7 +4669,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); - if (btrfs_leaf_free_space(fs_info, leaf) < data_size) { + if (btrfs_leaf_free_space(leaf) < data_size) { btrfs_print_leaf(leaf); BUG(); } @@ -4710,7 +4708,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, btrfs_set_item_size(leaf, item, old_size + data_size); btrfs_mark_buffer_dirty(leaf); - if (btrfs_leaf_free_space(fs_info, leaf) < 0) { + if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } @@ -4749,10 +4747,10 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); - if (btrfs_leaf_free_space(fs_info, leaf) < total_size) { + if (btrfs_leaf_free_space(leaf) < total_size) { btrfs_print_leaf(leaf); btrfs_crit(fs_info, "not enough freespace need %u have %d", - total_size, btrfs_leaf_free_space(fs_info, leaf)); + total_size, btrfs_leaf_free_space(leaf)); BUG(); } @@ -4803,7 +4801,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_nritems(leaf, nritems + nr); btrfs_mark_buffer_dirty(leaf); - if (btrfs_leaf_free_space(fs_info, leaf) < 0) { + if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1ec08bb1c2e0..1d1e12400552 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3011,8 +3011,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { return btrfs_next_old_item(root, p, 0); } -int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf); +int btrfs_leaf_free_space(struct extent_buffer *leaf); int __must_check btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, int update_ref, int for_reloc); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index e61947f5eb76..43fdb2992956 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -691,7 +691,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_delayed_item *item) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_delayed_item *curr, *next; int free_space; int total_data_size = 0, total_size = 0; @@ -708,7 +707,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root, BUG_ON(!path->nodes[0]); leaf = path->nodes[0]; - free_space = btrfs_leaf_free_space(fs_info, leaf); + free_space = btrfs_leaf_free_space(leaf); INIT_LIST_HEAD(&head); next = item; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6fccac9eab96..0867fca4b63d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -828,11 +828,11 @@ again: u32 diff; u32 free_space; - if (btrfs_leaf_free_space(fs_info, leaf) < + if (btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item) + csum_size * 2) goto insert; - free_space = btrfs_leaf_free_space(fs_info, leaf) - + free_space = btrfs_leaf_free_space(leaf) - sizeof(struct btrfs_item) - csum_size; tmp = sums->len - total_bytes; tmp >>= fs_info->sb->s_blocksize_bits; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5e6aee84daee..15cc3b861346 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1050,7 +1050,7 @@ delete_extent_item: if (!ret && replace_extent && leafs_visited == 1 && (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || path->locks[0] == BTRFS_WRITE_LOCK) && - btrfs_leaf_free_space(fs_info, leaf) >= + btrfs_leaf_free_space(leaf) >= sizeof(struct btrfs_item) + extent_item_size) { key.objectid = ino; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index df49931ffe92..1141ca5fae6a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -189,7 +189,7 @@ void btrfs_print_leaf(struct extent_buffer *l) btrfs_info(fs_info, "leaf %llu gen %llu total ptrs %d free space %d owner %llu", btrfs_header_bytenr(l), btrfs_header_generation(l), nr, - btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); + btrfs_leaf_free_space(l), btrfs_header_owner(l)); print_eb_refs_lock(l); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 69126d5b4d62..fa820c56ba3e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -174,7 +174,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, char *ptr; if (size > old_data_len) { - if (btrfs_leaf_free_space(fs_info, leaf) < + if (btrfs_leaf_free_space(leaf) < (size - old_data_len)) { ret = -ENOSPC; goto out; -- cgit v1.2.3-59-g8ed1b From d0d20b0f5c6d0ca109957160d40b67542c7e0b51 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:54:01 +0100 Subject: btrfs: get fs_info from eb in read_node_slot We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 597337b07afa..3e26254fefe8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1792,9 +1792,8 @@ static void root_sub_used(struct btrfs_root *root, u32 size) /* given a node and slot number, this reads the blocks it points to. The * extent buffer is returned with a reference taken (but unlocked). */ -static noinline struct extent_buffer * -read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, - int slot) +static noinline struct extent_buffer *read_node_slot( + struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); struct extent_buffer *eb; @@ -1806,7 +1805,7 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, BUG_ON(level == 0); btrfs_node_key_to_cpu(parent, &first_key, slot); - eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), + eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), btrfs_node_ptr_generation(parent, slot), level - 1, &first_key); if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { @@ -1863,7 +1862,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, return 0; /* promote the child to a root */ - child = read_node_slot(fs_info, mid, 0); + child = read_node_slot(mid, 0); if (IS_ERR(child)) { ret = PTR_ERR(child); btrfs_handle_fs_error(fs_info, ret, NULL); @@ -1903,7 +1902,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) return 0; - left = read_node_slot(fs_info, parent, pslot - 1); + left = read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; @@ -1918,7 +1917,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } } - right = read_node_slot(fs_info, parent, pslot + 1); + right = read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; @@ -2078,7 +2077,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (!parent) return 1; - left = read_node_slot(fs_info, parent, pslot - 1); + left = read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; @@ -2131,7 +2130,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, btrfs_tree_unlock(left); free_extent_buffer(left); } - right = read_node_slot(fs_info, parent, pslot + 1); + right = read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; @@ -3767,7 +3766,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); - right = read_node_slot(fs_info, upper, slot + 1); + right = read_node_slot(upper, slot + 1); /* * slot + 1 is not valid or we fail to read the right node, * no big deal, just return. @@ -4002,7 +4001,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); - left = read_node_slot(fs_info, path->nodes[1], slot - 1); + left = read_node_slot(path->nodes[1], slot - 1); /* * slot - 1 is not valid or we fail to read the left node, * no big deal, just return. @@ -5133,7 +5132,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_path *path, u64 min_trans) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *cur; struct btrfs_key found_key; int slot; @@ -5214,7 +5212,7 @@ find_next_key: goto out; } btrfs_set_path_blocking(path); - cur = read_node_slot(fs_info, cur, slot); + cur = read_node_slot(cur, slot); if (IS_ERR(cur)) { ret = PTR_ERR(cur); goto out; @@ -5243,7 +5241,7 @@ static int tree_move_down(struct btrfs_fs_info *fs_info, struct extent_buffer *eb; BUG_ON(*level == 0); - eb = read_node_slot(fs_info, path->nodes[*level], path->slots[*level]); + eb = read_node_slot(path->nodes[*level], path->slots[*level]); if (IS_ERR(eb)) return PTR_ERR(eb); -- cgit v1.2.3-59-g8ed1b From 5ab12d1ff83723f724d8631228f2b747aaec0a85 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:56:39 +0100 Subject: btrfs: get fs_info from eb in btree_read_extent_buffer_pages We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64978fc36745..f7bfa73c2545 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -450,11 +450,11 @@ int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, * @level: expected level, mandatory check * @first_key: expected key of first slot, skip check if NULL */ -static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, +static int btree_read_extent_buffer_pages(struct extent_buffer *eb, u64 parent_transid, int level, struct btrfs_key *first_key) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct extent_io_tree *io_tree; int failed = 0; int ret; @@ -1097,7 +1097,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, if (IS_ERR(buf)) return buf; - ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + ret = btree_read_extent_buffer_pages(buf, parent_transid, level, first_key); if (ret) { free_extent_buffer_stale(buf); @@ -4158,10 +4158,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, struct btrfs_key *first_key) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; - struct btrfs_fs_info *fs_info = root->fs_info; - - return btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + return btree_read_extent_buffer_pages(buf, parent_transid, level, first_key); } -- cgit v1.2.3-59-g8ed1b From e064d5e9f0a00041f84d9eabd3d53546e4f8ab74 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:58:13 +0100 Subject: btrfs: get fs_info from eb in btrfs_verify_level_key We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/disk-io.h | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3e26254fefe8..0998e7fba98b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2420,7 +2420,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, * being cached, read from scrub, or have multiple * parents (shared tree blocks). */ - if (btrfs_verify_level_key(fs_info, tmp, + if (btrfs_verify_level_key(tmp, parent_level - 1, &first_key, gen)) { free_extent_buffer(tmp); return -EUCLEAN; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f7bfa73c2545..b4c070936289 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -394,10 +394,10 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } -int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int level, +int btrfs_verify_level_key(struct extent_buffer *eb, int level, struct btrfs_key *first_key, u64 parent_transid) { + struct btrfs_fs_info *fs_info = eb->fs_info; int found_level; struct btrfs_key found_key; int ret; @@ -471,7 +471,7 @@ static int btree_read_extent_buffer_pages(struct extent_buffer *eb, if (verify_parent_transid(io_tree, eb, parent_transid, 0)) ret = -EIO; - else if (btrfs_verify_level_key(fs_info, eb, level, + else if (btrfs_verify_level_key(eb, level, first_key, parent_transid)) ret = -EUCLEAN; else diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 917634f2bae9..a32158f0c2fc 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,8 +39,7 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_device; struct btrfs_fs_devices; -int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int level, +int btrfs_verify_level_key(struct extent_buffer *eb, int level, struct btrfs_key *first_key, u64 parent_transid); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 parent_transid, int level, -- cgit v1.2.3-59-g8ed1b From 90b1377daa9633973d595487d717d43d3c601420 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Mar 2019 16:55:26 +0100 Subject: btrfs: qgroup: remove obsolete fs_info members The commit fcebe4562dec ("Btrfs: rework qgroup accounting") reworked qgroups and added some new structures. Another rework of qgroup mechanics e69bcee37692 ("btrfs: qgroup: Cleanup the old ref_node-oriented mechanism.") stopped using them and left uncleaned. Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 --- fs/btrfs/disk-io.c | 3 --- fs/btrfs/tests/btrfs-tests.c | 1 - 3 files changed, 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1d1e12400552..5d85c55032dd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1092,10 +1092,7 @@ struct btrfs_fs_info { /* holds configuration and tracking. Protected by qgroup_lock */ struct rb_root qgroup_tree; - struct rb_root qgroup_op_tree; spinlock_t qgroup_lock; - spinlock_t qgroup_op_lock; - atomic_t qgroup_op_seq; /* * used to avoid frequently calling ulist_alloc()/ulist_free() diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b4c070936289..0fa65aca56a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2161,7 +2161,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->qgroup_lock); mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; - fs_info->qgroup_op_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; fs_info->qgroup_ulist = NULL; @@ -2666,7 +2665,6 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); - spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); @@ -2693,7 +2691,6 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic_set(&fs_info->nr_delayed_iputs, 0); atomic64_set(&fs_info->tree_mod_seq, 0); diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 314d7bdac9d5..9238fd4f1734 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -109,7 +109,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->qgroup_lock); - spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->tree_mod_seq_lock); -- cgit v1.2.3-59-g8ed1b From 82fc28fbedbb59642f05215db3b0ef4eb91aa31d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:16:42 +0800 Subject: btrfs: Move btrfs_check_chunk_valid() to tree-check.[ch] and export it By function, chunk item verification is more suitable to be done inside tree-checker. So move btrfs_check_chunk_valid() to tree-checker.c and export it. And since it's now moved to tree-checker, also add a better comment for what this function is doing. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 4 ++ fs/btrfs/volumes.c | 94 +---------------------------------------------- 3 files changed, 102 insertions(+), 93 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index a62e1e837a89..eee861975816 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -448,6 +448,103 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, return 0; } +/* + * The common chunk check which could also work on super block sys chunk array. + * + * Return -EIO if anything is corrupted. + * Return 0 if everything is OK. + */ +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + u64 features; + bool mixed = false; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (!num_stripes) { + btrfs_err(fs_info, "invalid chunk num_stripes: %u", + num_stripes); + return -EIO; + } + if (!IS_ALIGNED(logical, fs_info->sectorsize)) { + btrfs_err(fs_info, "invalid chunk logical %llu", logical); + return -EIO; + } + if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { + btrfs_err(fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } + if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { + btrfs_err(fs_info, "invalid chunk length %llu", length); + return -EIO; + } + if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { + btrfs_err(fs_info, "invalid chunk stripe length: %llu", + stripe_len); + return -EIO; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + type) { + btrfs_err(fs_info, "unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { + btrfs_err(fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + /* * Common point to switch the item-specific validation. */ diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index ff043275b784..4df45e8a6659 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -25,4 +25,8 @@ int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical); + #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fcb0d3f34e09..5f39d86d41ec 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -27,6 +27,7 @@ #include "math.h" #include "dev-replace.h" #include "sysfs.h" +#include "tree-checker.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -6714,99 +6715,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -/* Return -EIO if any error, otherwise return 0. */ -static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 logical) -{ - u64 length; - u64 stripe_len; - u16 num_stripes; - u16 sub_stripes; - u64 type; - u64 features; - bool mixed = false; - - length = btrfs_chunk_length(leaf, chunk); - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); - type = btrfs_chunk_type(leaf, chunk); - - if (!num_stripes) { - btrfs_err(fs_info, "invalid chunk num_stripes: %u", - num_stripes); - return -EIO; - } - if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk logical %llu", logical); - return -EIO; - } - if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - btrfs_err(fs_info, "invalid chunk sectorsize %u", - btrfs_chunk_sector_size(leaf, chunk)); - return -EIO; - } - if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk length %llu", length); - return -EIO; - } - if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(fs_info, "invalid chunk stripe length: %llu", - stripe_len); - return -EIO; - } - if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - type) { - btrfs_err(fs_info, "unrecognized chunk type: %llu", - ~(BTRFS_BLOCK_GROUP_TYPE_MASK | - BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && - (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - btrfs_err(fs_info, - "system chunk with data or metadata type: 0x%llx", type); - return -EIO; - } - - features = btrfs_super_incompat_flags(fs_info->super_copy); - if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) - mixed = true; - - if (!mixed) { - if ((type & BTRFS_BLOCK_GROUP_METADATA) && - (type & BTRFS_BLOCK_GROUP_DATA)) { - btrfs_err(fs_info, - "mixed chunk type in non-mixed mode: 0x%llx", type); - return -EIO; - } - } - - if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || - (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || - (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || - ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && - num_stripes != 1)) { - btrfs_err(fs_info, - "invalid num_stripes:sub_stripes %u:%u for profile %llu", - num_stripes, sub_stripes, - type & BTRFS_BLOCK_GROUP_PROFILE_MASK); - return -EIO; - } - - return 0; -} - static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, u8 *uuid, bool error) { -- cgit v1.2.3-59-g8ed1b From f114024376bceb1c0f61a7bad4a72a0f978767af Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:36:06 +0800 Subject: btrfs: tree-checker: Make chunk item checker messages more readable Old error message would be something like: BTRFS error (device dm-3): invalid chunk num_stipres: 0 New error message would be: Btrfs critical (device dm-3): corrupt superblock syschunk array: chunk_start=2097152, invalid chunk num_stripes: 0 Or Btrfs critical (device dm-3): corrupt leaf: root=3 block=8388608 slot=3 chunk_start=2097152, invalid chunk num_stripes: 0 And for certain error message, also output expected value. The error message levels are changed from error to critical. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 81 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index eee861975816..80d87814f261 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -448,6 +448,51 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, return 0; } +__printf(5, 6) +__cold +static void chunk_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, + const struct btrfs_chunk *chunk, u64 logical, + const char *fmt, ...) +{ + bool is_sb; + struct va_format vaf; + va_list args; + int i; + int slot = -1; + + /* Only superblock eb is able to have such small offset */ + is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); + + if (!is_sb) { + /* + * Get the slot number by iterating through all slots, this + * would provide better readability. + */ + for (i = 0; i < btrfs_header_nritems(leaf); i++) { + if (btrfs_item_ptr_offset(leaf, i) == + (unsigned long)chunk) { + slot = i; + break; + } + } + } + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (is_sb) + btrfs_crit(fs_info, + "corrupt superblock syschunk array: chunk_start=%llu, %pV", + logical, &vaf); + else + btrfs_crit(fs_info, + "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", + BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, + logical, &vaf); + va_end(args); +} + /* * The common chunk check which could also work on super block sys chunk array. * @@ -473,31 +518,38 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, type = btrfs_chunk_type(leaf, chunk); if (!num_stripes) { - btrfs_err(fs_info, "invalid chunk num_stripes: %u", - num_stripes); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk num_stripes, have %u", num_stripes); return -EIO; } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk logical %llu", logical); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk logical, have %llu should aligned to %u", + logical, fs_info->sectorsize); return -EIO; } if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - btrfs_err(fs_info, "invalid chunk sectorsize %u", - btrfs_chunk_sector_size(leaf, chunk)); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk sectorsize, have %u expect %u", + btrfs_chunk_sector_size(leaf, chunk), + fs_info->sectorsize); return -EIO; } if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk length %llu", length); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk length, have %llu", length); return -EIO; } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(fs_info, "invalid chunk stripe length: %llu", + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk stripe length: %llu", stripe_len); return -EIO; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type) { - btrfs_err(fs_info, "unrecognized chunk type: %llu", + chunk_err(fs_info, leaf, chunk, logical, + "unrecognized chunk type: 0x%llx", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); @@ -505,14 +557,17 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + chunk_err(fs_info, leaf, chunk, logical, + "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", + type, BTRFS_BLOCK_GROUP_TYPE_MASK); return -EIO; } if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - btrfs_err(fs_info, - "system chunk with data or metadata type: 0x%llx", type); + chunk_err(fs_info, leaf, chunk, logical, + "system chunk with data or metadata type: 0x%llx", + type); return -EIO; } @@ -523,7 +578,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!mixed) { if ((type & BTRFS_BLOCK_GROUP_METADATA) && (type & BTRFS_BLOCK_GROUP_DATA)) { - btrfs_err(fs_info, + chunk_err(fs_info, leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); return -EIO; } @@ -535,7 +590,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { - btrfs_err(fs_info, + chunk_err(fs_info, leaf, chunk, logical, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); -- cgit v1.2.3-59-g8ed1b From bf871c3b43b1dcc3f2a076ff39a8f1ce7959d958 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:39:14 +0800 Subject: btrfs: tree-checker: Make btrfs_check_chunk_valid() return EUCLEAN instead of EIO To follow the standard behavior of tree-checker. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 80d87814f261..75cc9e1d61bb 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -496,7 +496,7 @@ static void chunk_err(const struct btrfs_fs_info *fs_info, /* * The common chunk check which could also work on super block sys chunk array. * - * Return -EIO if anything is corrupted. + * Return -EUCLEAN if anything is corrupted. * Return 0 if everything is OK. */ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, @@ -520,31 +520,31 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!num_stripes) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); - return -EIO; + return -EUCLEAN; } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", logical, fs_info->sectorsize); - return -EIO; + return -EUCLEAN; } if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk sectorsize, have %u expect %u", btrfs_chunk_sector_size(leaf, chunk), fs_info->sectorsize); - return -EIO; + return -EUCLEAN; } if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk length, have %llu", length); - return -EIO; + return -EUCLEAN; } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk stripe length: %llu", stripe_len); - return -EIO; + return -EUCLEAN; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type) { @@ -553,14 +553,14 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); - return -EIO; + return -EUCLEAN; } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { chunk_err(fs_info, leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", type, BTRFS_BLOCK_GROUP_TYPE_MASK); - return -EIO; + return -EUCLEAN; } if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && @@ -568,7 +568,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, chunk_err(fs_info, leaf, chunk, logical, "system chunk with data or metadata type: 0x%llx", type); - return -EIO; + return -EUCLEAN; } features = btrfs_super_incompat_flags(fs_info->super_copy); @@ -580,7 +580,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, (type & BTRFS_BLOCK_GROUP_DATA)) { chunk_err(fs_info, leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); - return -EIO; + return -EUCLEAN; } } @@ -594,7 +594,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); - return -EIO; + return -EUCLEAN; } return 0; -- cgit v1.2.3-59-g8ed1b From 075cb3c78fe7976c9f29ca1fa23f9728634ecefc Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:42:33 +0800 Subject: btrfs: tree-checker: Check chunk item at tree block read time Since we have btrfs_check_chunk_valid() in tree-checker, let's do chunk item verification in tree-checker too. Since the tree-checker is run at endio time, if one chunk leaf fails chunk verification, we can still retry the other copy, making btrfs more robust to fuzzed image as we may still get a good chunk item. Also since we have done chunk verification in tree block read time, skip the btrfs_check_chunk_valid() call in read_one_chunk() if we're reading chunk items from leaf. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 ++++++ fs/btrfs/volumes.c | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 75cc9e1d61bb..33d04fc4d280 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -608,6 +608,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, struct btrfs_key *key, int slot) { int ret = 0; + struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: @@ -624,6 +625,11 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_BLOCK_GROUP_ITEM_KEY: ret = check_block_group_item(fs_info, leaf, key, slot); break; + case BTRFS_CHUNK_ITEM_KEY: + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, + key->offset); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5f39d86d41ec..97dbd7f37674 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6745,9 +6745,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, length = btrfs_chunk_length(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); - if (ret) - return ret; + /* + * Only need to verify chunk item if we're reading from sys chunk array, + * as chunk item in tree block is already verified by tree-checker. + */ + if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); + if (ret) + return ret; + } read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); -- cgit v1.2.3-59-g8ed1b From ab4ba2e133463c702b37242560d7fabedd2dc750 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Mar 2019 14:20:03 +0800 Subject: btrfs: tree-checker: Verify dev item [BUG] For fuzzed image whose DEV_ITEM has invalid total_bytes as 0, then kernel will just panic: BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 #PF error: [normal kernel read fault] PGD 800000022b2bd067 P4D 800000022b2bd067 PUD 22b2bc067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 1106 Comm: mount Not tainted 5.0.0-rc8+ #9 RIP: 0010:btrfs_verify_dev_extents+0x2a5/0x5a0 Call Trace: open_ctree+0x160d/0x2149 btrfs_mount_root+0x5b2/0x680 [CAUSE] If device extent verification finds a deivce with 0 total_bytes, then it assumes it's a seed dummy, then search for seed devices. But in this case, there is no seed device at all, causing NULL pointer. [FIX] Since this is caused by fuzzed image, let's go the tree-check way, just add a new verification for device item. Reported-by: Yoon Jungyeon Link: https://bugzilla.kernel.org/show_bug.cgi?id=202691 Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 9 ------ fs/btrfs/volumes.h | 9 ++++++ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 33d04fc4d280..00ab4e783b9f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -600,6 +600,77 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return 0; } +__printf(4, 5) +__cold +static void dev_item_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, &vaf); + va_end(args); +} + +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dev_item *ditem; + u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); + + if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { + dev_item_err(fs_info, leaf, slot, + "invalid objectid: has=%llu expect=%llu", + key->objectid, BTRFS_DEV_ITEMS_OBJECTID); + return -EUCLEAN; + } + if (key->offset > max_devid) { + dev_item_err(fs_info, leaf, slot, + "invalid devid: has=%llu expect=[0, %llu]", + key->offset, max_devid); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); + if (btrfs_device_id(leaf, ditem) != key->offset) { + dev_item_err(fs_info, leaf, slot, + "devid mismatch: key has=%llu item has=%llu", + key->offset, btrfs_device_id(leaf, ditem)); + return -EUCLEAN; + } + + /* + * For device total_bytes, we don't have reliable way to check it, as + * it can be 0 for device removal. Device size check can only be done + * by dev extents check. + */ + if (btrfs_device_bytes_used(leaf, ditem) > + btrfs_device_total_bytes(leaf, ditem)) { + dev_item_err(fs_info, leaf, slot, + "invalid bytes used: have %llu expect [0, %llu]", + btrfs_device_bytes_used(leaf, ditem), + btrfs_device_total_bytes(leaf, ditem)); + return -EUCLEAN; + } + /* + * Remaining members like io_align/type/gen/dev_group aren't really + * utilized. Skip them to make later usage of them easier. + */ + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -630,6 +701,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset); break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, leaf, key, slot); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 97dbd7f37674..77bca3a61e26 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4958,15 +4958,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } -#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ - - sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - -#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ - - 2 * sizeof(struct btrfs_disk_key) \ - - 2 * sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 start, u64 type) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3ad9d58d1b66..38ed94b77202 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -258,6 +258,15 @@ struct btrfs_fs_devices { #define BTRFS_BIO_INLINE_CSUM_SIZE 64 +#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). -- cgit v1.2.3-59-g8ed1b From 80e46cf22ba0bcb57b39c7c3b52961ab3a0fd5f2 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 13 Mar 2019 12:17:50 +0800 Subject: btrfs: tree-checker: Enhance chunk checker to validate chunk profile Btrfs-progs already have a comprehensive type checker, to ensure there is only 0 (SINGLE profile) or 1 (DUP/RAID0/1/5/6/10) bit set for chunk profile bits. Do the same work for kernel. Reported-by: Yoon Jungyeon Link: https://bugzilla.kernel.org/show_bug.cgi?id=202765 Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 00ab4e783b9f..d7f4a3468945 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -556,6 +556,13 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return -EUCLEAN; } + if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EUCLEAN; + } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { chunk_err(fs_info, leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", -- cgit v1.2.3-59-g8ed1b From 496245cac57e26d8b738d85c7a29cf9a47610f3f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 13 Mar 2019 14:31:35 +0800 Subject: btrfs: tree-checker: Verify inode item There is a report in kernel bugzilla about mismatch file type in dir item and inode item. This inspires us to check inode mode in inode item. This patch will check the following members: - inode key objectid Should be ROOT_DIR_DIR or [256, (u64)-256] or FREE_INO. - inode key offset Should be 0 - inode item generation - inode item transid No newer than sb generation + 1. The +1 is for log tree. - inode item mode No unknown bits. No invalid S_IF* bit. NOTE: S_IFMT check is not enough, need to check every know type. - inode item nlink Dir should have no more link than 1. - inode item flags Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 15 ++++++++ fs/btrfs/tree-checker.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5d85c55032dd..19833b4af630 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1537,6 +1537,21 @@ do { \ #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + struct btrfs_map_token { const struct extent_buffer *eb; char *kaddr; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d7f4a3468945..ae4361cc6db5 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -678,6 +678,97 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, return 0; } +/* Inode item error output has the same format as dir_item_err() */ +#define inode_item_err(fs_info, eb, slot, fmt, ...) \ + dir_item_err(fs_info, eb, slot, fmt, __VA_ARGS__) + +static int check_inode_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_inode_item *iitem; + u64 super_gen = btrfs_super_generation(fs_info->super_copy); + u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); + u32 mode; + + if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID || + key->objectid > BTRFS_LAST_FREE_OBJECTID) && + key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && + key->objectid != BTRFS_FREE_INO_OBJECTID) { + generic_err(fs_info, leaf, slot, + "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", + key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID, + BTRFS_FREE_INO_OBJECTID); + return -EUCLEAN; + } + if (key->offset != 0) { + inode_item_err(fs_info, leaf, slot, + "invalid key offset: has %llu expect 0", + key->offset); + return -EUCLEAN; + } + iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); + + /* Here we use super block generation + 1 to handle log tree */ + if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode generation: has %llu expect (0, %llu]", + btrfs_inode_generation(leaf, iitem), + super_gen + 1); + return -EUCLEAN; + } + /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ + if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode generation: has %llu expect [0, %llu]", + btrfs_inode_transid(leaf, iitem), super_gen + 1); + return -EUCLEAN; + } + + /* + * For size and nbytes it's better not to be too strict, as for dir + * item its size/nbytes can easily get wrong, but doesn't affect + * anything in the fs. So here we skip the check. + */ + mode = btrfs_inode_mode(leaf, iitem); + if (mode & ~valid_mask) { + inode_item_err(fs_info, leaf, slot, + "unknown mode bit detected: 0x%x", + mode & ~valid_mask); + return -EUCLEAN; + } + + /* + * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2, + * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG. + * Only needs to check BLK, LNK and SOCKS + */ + if (!is_power_of_2(mode & S_IFMT)) { + if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) { + inode_item_err(fs_info, leaf, slot, + "invalid mode: has 0%o expect valid S_IF* bit(s)", + mode & S_IFMT); + return -EUCLEAN; + } + } + if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) { + inode_item_err(fs_info, leaf, slot, + "invalid nlink: has %u expect no more than 1 for dir", + btrfs_inode_nlink(leaf, iitem)); + return -EUCLEAN; + } + if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) { + inode_item_err(fs_info, leaf, slot, + "unknown flags detected: 0x%llx", + btrfs_inode_flags(leaf, iitem) & + ~BTRFS_INODE_FLAG_MASK); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -711,6 +802,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(fs_info, leaf, key, slot); break; + case BTRFS_INODE_ITEM_KEY: + ret = check_inode_item(fs_info, leaf, key, slot); + break; } return ret; } -- cgit v1.2.3-59-g8ed1b From 6bf9e4bd6a277840d3fe8c5d5d530a1fbd3db592 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 13 Mar 2019 13:55:11 +0800 Subject: btrfs: inode: Verify inode mode to avoid NULL pointer dereference [BUG] When accessing a file on a crafted image, btrfs can crash in block layer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 136501067 P4D 136501067 PUD 124519067 PMD 0 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc8-default #252 RIP: 0010:end_bio_extent_readpage+0x144/0x700 Call Trace: blk_update_request+0x8f/0x350 blk_mq_end_request+0x1a/0x120 blk_done_softirq+0x99/0xc0 __do_softirq+0xc7/0x467 irq_exit+0xd1/0xe0 call_function_single_interrupt+0xf/0x20 RIP: 0010:default_idle+0x1e/0x170 [CAUSE] The crafted image has a tricky corruption, the INODE_ITEM has a different type against its parent dir: item 20 key (268 INODE_ITEM 0) itemoff 2808 itemsize 160 generation 13 transid 13 size 1048576 nbytes 1048576 block group 0 mode 121644 links 1 uid 0 gid 0 rdev 0 sequence 9 flags 0x0(none) This mode number 0120000 means it's a symlink. But the dir item think it's still a regular file: item 8 key (264 DIR_INDEX 5) itemoff 3707 itemsize 32 location key (268 INODE_ITEM 0) type FILE transid 13 data_len 0 name_len 2 name: f4 item 40 key (264 DIR_ITEM 51821248) itemoff 1573 itemsize 32 location key (268 INODE_ITEM 0) type FILE transid 13 data_len 0 name_len 2 name: f4 For symlink, we don't set BTRFS_I(inode)->io_tree.ops and leave it empty, as symlink is only designed to have inlined extent, all handled by tree block read. Thus no need to trigger btrfs_submit_bio_hook() for inline file extent. However end_bio_extent_readpage() expects tree->ops populated, as it's reading regular data extent. This causes NULL pointer dereference. [FIX] This patch fixes the problem in two ways: - Verify inode mode against its dir item when looking up inode So in btrfs_lookup_dentry() if we find inode mode mismatch with dir item, we error out so that corrupted inode will not be accessed. - Verify inode mode when getting extent mapping Only regular file should have regular or preallocated extent. If we found regular/preallocated file extent for symlink or the rest, we error out before submitting the read bio. With this fix that crafted image can be rejected gracefully: BTRFS critical (device loop0): inode mode mismatch with dir: inode mode=0121644 btrfs type=7 dir type=1 Reported-by: Yoon Jungyeon Link: https://bugzilla.kernel.org/show_bug.cgi?id=202763 Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 67 ++++++++++++++++++++++++++++++-------------- fs/btrfs/tests/inode-tests.c | 1 + 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1d81a7a78a3f..baa80d808806 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5437,12 +5437,14 @@ no_delete: } /* - * this returns the key found in the dir entry in the location pointer. + * Return the key found in the dir entry in the location pointer, fill @type + * with BTRFS_FT_*, and return 0. + * * If no dir entries were found, returns -ENOENT. * If found a corrupted location in dir entry, returns -EUCLEAN. */ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, - struct btrfs_key *location) + struct btrfs_key *location, u8 *type) { const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; @@ -5471,6 +5473,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, __func__, name, btrfs_ino(BTRFS_I(dir)), location->objectid, location->type, location->offset); } + if (!ret) + *type = btrfs_dir_type(path->nodes[0], di); out: btrfs_free_path(path); return ret; @@ -5708,6 +5712,24 @@ static struct inode *new_simple_dir(struct super_block *s, return inode; } +static inline u8 btrfs_inode_type(struct inode *inode) +{ + /* + * Compile-time asserts that generic FT_* types still match + * BTRFS_FT_* types + */ + BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN); + BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE); + BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR); + BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV); + BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV); + BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO); + BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK); + BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK); + + return fs_umode_to_ftype(inode->i_mode); +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); @@ -5715,18 +5737,31 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; struct btrfs_key location; + u8 di_type = 0; int index; int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + ret = btrfs_inode_by_name(dir, dentry, &location, &di_type); if (ret < 0) return ERR_PTR(ret); if (location.type == BTRFS_INODE_ITEM_KEY) { inode = btrfs_iget(dir->i_sb, &location, root, NULL); + if (IS_ERR(inode)) + return inode; + + /* Do extra check against inode mode with di_type */ + if (btrfs_inode_type(inode) != di_type) { + btrfs_crit(fs_info, +"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u", + inode->i_mode, btrfs_inode_type(inode), + di_type); + iput(inode); + return ERR_PTR(-EUCLEAN); + } return inode; } @@ -6327,24 +6362,6 @@ fail: return ERR_PTR(ret); } -static inline u8 btrfs_inode_type(struct inode *inode) -{ - /* - * Compile-time asserts that generic FT_* types still match - * BTRFS_FT_* types - */ - BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN); - BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE); - BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR); - BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV); - BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV); - BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO); - BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK); - BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK); - - return fs_umode_to_ftype(inode->i_mode); -} - /* * utility function to add 'inode' into 'parent_inode' with * a give name and a given sequence number. @@ -6862,6 +6879,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_start = found_key.offset; if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { + /* Only regular file could have regular/prealloc extent */ + if (!S_ISREG(inode->vfs_inode.i_mode)) { + ret = -EUCLEAN; + btrfs_crit(fs_info, + "regular/prealloc extent found for non-regular inode %llu", + btrfs_ino(inode)); + goto out; + } extent_end = extent_start + btrfs_file_extent_num_bytes(leaf, item); diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 3d2c7abda5de..bc6dbd1b42fd 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -234,6 +234,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) return ret; } + inode->i_mode = S_IFREG; BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; BTRFS_I(inode)->location.offset = 0; -- cgit v1.2.3-59-g8ed1b From 86a6be3abe6c48f01c94a2f4ae4ef13e57464ca6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 15:31:28 +0100 Subject: btrfs: tree-checker: get fs_info from eb in generic_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ae4361cc6db5..4ee1102b554a 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -41,12 +41,12 @@ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. * Allows callers to customize the output. */ -__printf(4, 5) +__printf(3, 4) __cold -static void generic_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *eb, int slot, +static void generic_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { + const struct btrfs_fs_info *fs_info = eb->fs_info; struct va_format vaf; va_list args; @@ -196,19 +196,19 @@ static int check_csum_item(struct btrfs_fs_info *fs_info, u32 csumsize = btrfs_super_csum_size(fs_info->super_copy); if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "invalid key objectid for csum item, have %llu expect %llu", key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); return -EUCLEAN; } if (!IS_ALIGNED(key->offset, sectorsize)) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "unaligned key offset for csum item, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; } if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "unaligned item size for csum item, have %u should be aligned to %u", btrfs_item_size_nr(leaf, slot), csumsize); return -EUCLEAN; @@ -695,7 +695,7 @@ static int check_inode_item(struct btrfs_fs_info *fs_info, key->objectid > BTRFS_LAST_FREE_OBJECTID) && key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && key->objectid != BTRFS_FREE_INO_OBJECTID) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, BTRFS_FIRST_FREE_OBJECTID, @@ -819,7 +819,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, int slot; if (btrfs_header_level(leaf) != 0) { - generic_err(fs_info, leaf, 0, + generic_err(leaf, 0, "invalid level for leaf, have %d expect 0", btrfs_header_level(leaf)); return -EUCLEAN; @@ -844,7 +844,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, owner == BTRFS_DEV_TREE_OBJECTID || owner == BTRFS_FS_TREE_OBJECTID || owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { - generic_err(fs_info, leaf, 0, + generic_err(leaf, 0, "invalid root, root %llu must never be empty", owner); return -EUCLEAN; @@ -864,7 +864,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, eb = btrfs_root_node(check_root); /* if leaf is the root, then it's fine */ if (leaf != eb) { - generic_err(fs_info, leaf, 0, + generic_err(leaf, 0, "invalid nritems, have %u should not be 0 for non-root leaf", nritems); free_extent_buffer(eb); @@ -897,7 +897,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, /* Make sure the keys are in the right order */ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", prev_key.objectid, prev_key.type, prev_key.offset, key.objectid, key.type, @@ -916,7 +916,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, item_end_expected = btrfs_item_offset_nr(leaf, slot - 1); if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "unexpected item end, have %u expect %u", btrfs_item_end_nr(leaf, slot), item_end_expected); @@ -930,7 +930,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, */ if (btrfs_item_end_nr(leaf, slot) > BTRFS_LEAF_DATA_SIZE(fs_info)) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "slot end outside of leaf, have %u expect range [0, %u]", btrfs_item_end_nr(leaf, slot), BTRFS_LEAF_DATA_SIZE(fs_info)); @@ -940,7 +940,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, /* Also check if the item pointer overlaps with btrfs item. */ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > btrfs_item_ptr_offset(leaf, slot)) { - generic_err(fs_info, leaf, slot, + generic_err(leaf, slot, "slot overlaps with its data, item end %lu data start %lu", btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item), @@ -988,7 +988,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) int ret = 0; if (level <= 0 || level >= BTRFS_MAX_LEVEL) { - generic_err(fs_info, node, 0, + generic_err(node, 0, "invalid level for node, have %d expect [1, %d]", level, BTRFS_MAX_LEVEL - 1); return -EUCLEAN; @@ -1008,13 +1008,13 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) btrfs_node_key_to_cpu(node, &next_key, slot + 1); if (!bytenr) { - generic_err(fs_info, node, slot, + generic_err(node, slot, "invalid NULL node pointer"); ret = -EUCLEAN; goto out; } if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) { - generic_err(fs_info, node, slot, + generic_err(node, slot, "unaligned pointer, have %llu should be aligned to %u", bytenr, fs_info->sectorsize); ret = -EUCLEAN; @@ -1022,7 +1022,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) } if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - generic_err(fs_info, node, slot, + generic_err(node, slot, "bad key order, current (%llu %u %llu) next (%llu %u %llu)", key.objectid, key.type, key.offset, next_key.objectid, next_key.type, -- cgit v1.2.3-59-g8ed1b From 1fd715ffdd14870307c8cd6965c454d928b66d40 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 15:32:46 +0100 Subject: btrfs: tree-checker: get fs_info from eb in file_extent_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 4ee1102b554a..363462d30681 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -66,12 +66,12 @@ static void generic_err(const struct extent_buffer *eb, int slot, * Customized reporter for extent data item, since its key objectid and * offset has its own meaning. */ -__printf(4, 5) +__printf(3, 4) __cold -static void file_extent_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *eb, int slot, +static void file_extent_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { + const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; @@ -97,7 +97,7 @@ static void file_extent_err(const struct btrfs_fs_info *fs_info, #define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \ ({ \ if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \ - file_extent_err((fs_info), (leaf), (slot), \ + file_extent_err((leaf), (slot), \ "invalid %s for file extent, have %llu, should be aligned to %u", \ (#name), btrfs_file_extent_##name((leaf), (fi)), \ (alignment)); \ @@ -113,7 +113,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, u32 item_size = btrfs_item_size_nr(leaf, slot); if (!IS_ALIGNED(key->offset, sectorsize)) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "unaligned file_offset for file extent, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; @@ -122,7 +122,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid type for file extent, have %u expect range [0, %u]", btrfs_file_extent_type(leaf, fi), BTRFS_FILE_EXTENT_TYPES); @@ -134,14 +134,14 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, * and must be caught in open_ctree(). */ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid compression for file extent, have %u expect range [0, %u]", btrfs_file_extent_compression(leaf, fi), BTRFS_COMPRESS_TYPES); return -EUCLEAN; } if (btrfs_file_extent_encryption(leaf, fi)) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid encryption for file extent, have %u expect 0", btrfs_file_extent_encryption(leaf, fi)); return -EUCLEAN; @@ -149,7 +149,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { /* Inline extent must have 0 as key offset */ if (key->offset) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid file_offset for inline file extent, have %llu expect 0", key->offset); return -EUCLEAN; @@ -163,7 +163,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, /* Uncompressed inline extent size must match item size */ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi)) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi)); @@ -174,7 +174,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, /* Regular or preallocated extent has fixed item size */ if (item_size != sizeof(*fi)) { - file_extent_err(fs_info, leaf, slot, + file_extent_err(leaf, slot, "invalid item size for reg/prealloc file extent, have %u expect %zu", item_size, sizeof(*fi)); return -EUCLEAN; -- cgit v1.2.3-59-g8ed1b From 68128ce756204bfd9f10223cb7ecc986eb335c86 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:02:56 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_csum_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 363462d30681..ba50ce494cbd 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -188,10 +188,10 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info, return 0; } -static int check_csum_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, struct btrfs_key *key, +static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; u32 sectorsize = fs_info->sectorsize; u32 csumsize = btrfs_super_csum_size(fs_info->super_copy); @@ -784,7 +784,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, ret = check_extent_data_item(fs_info, leaf, key, slot); break; case BTRFS_EXTENT_CSUM_KEY: - ret = check_csum_item(fs_info, leaf, key, slot); + ret = check_csum_item(leaf, key, slot); break; case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: -- cgit v1.2.3-59-g8ed1b From d98ced688f4939f458ed5a8743c06a6fb6dcfa8f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:07:27 +0100 Subject: btrfs: tree-checker: get fs_info from eb in dir_item_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ba50ce494cbd..84f0dbb59fe1 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -220,12 +220,12 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, * Customized reported for dir_item, only important new info is key->objectid, * which represents inode number */ -__printf(4, 5) +__printf(3, 4) __cold -static void dir_item_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *eb, int slot, +static void dir_item_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { + const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; @@ -263,7 +263,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; @@ -272,7 +272,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, /* dir type check */ dir_type = btrfs_dir_type(leaf, di); if (dir_type >= BTRFS_FT_MAX) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "invalid dir item type, have %u expect [0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; @@ -280,14 +280,14 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, if (key->type == BTRFS_XATTR_ITEM_KEY && dir_type != BTRFS_FT_XATTR) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "invalid dir item type for XATTR key, have %u expect %u", dir_type, BTRFS_FT_XATTR); return -EUCLEAN; } if (dir_type == BTRFS_FT_XATTR && key->type != BTRFS_XATTR_ITEM_KEY) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "xattr dir type found for non-XATTR key"); return -EUCLEAN; } @@ -300,13 +300,13 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, name_len = btrfs_dir_name_len(leaf, di); data_len = btrfs_dir_data_len(leaf, di); if (name_len > max_name_len) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "dir item name len too long, have %u max %u", name_len, max_name_len); return -EUCLEAN; } if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "dir item name and data len too long, have %u max %u", name_len + data_len, BTRFS_MAX_XATTR_SIZE(fs_info)); @@ -314,7 +314,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, } if (data_len && dir_type != BTRFS_FT_XATTR) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "dir item with invalid data len, have %u expect 0", data_len); return -EUCLEAN; @@ -324,7 +324,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, /* header and name/data should not cross item boundary */ if (cur + total_size > item_size) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "dir item data crosses item boundary, have %u boundary %u", cur + total_size, item_size); return -EUCLEAN; @@ -342,7 +342,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); if (key->offset != name_hash) { - dir_item_err(fs_info, leaf, slot, + dir_item_err(leaf, slot, "name hash mismatch with key, have 0x%016x expect 0x%016llx", name_hash, key->offset); return -EUCLEAN; @@ -680,7 +680,7 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, /* Inode item error output has the same format as dir_item_err() */ #define inode_item_err(fs_info, eb, slot, fmt, ...) \ - dir_item_err(fs_info, eb, slot, fmt, __VA_ARGS__) + dir_item_err(eb, slot, fmt, __VA_ARGS__) static int check_inode_item(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, -- cgit v1.2.3-59-g8ed1b From ce4252c049bd2df0270453e96fba0d0dd1db6db4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:17:46 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_dir_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 84f0dbb59fe1..367651d7319e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -244,10 +244,10 @@ static void dir_item_err(const struct extent_buffer *eb, int slot, va_end(args); } -static int check_dir_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_dir_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_dir_item *di; u32 item_size = btrfs_item_size_nr(leaf, slot); u32 cur = 0; @@ -789,7 +789,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: case BTRFS_XATTR_ITEM_KEY: - ret = check_dir_item(fs_info, leaf, key, slot); + ret = check_dir_item(leaf, key, slot); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: ret = check_block_group_item(fs_info, leaf, key, slot); -- cgit v1.2.3-59-g8ed1b From 4806bd886aa0f9d23072711a5d9c84ac6b6fcadf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:18:57 +0100 Subject: btrfs: tree-checker: get fs_info from eb in block_group_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 367651d7319e..b7201f4a12a6 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -354,12 +354,12 @@ static int check_dir_item(struct extent_buffer *leaf, return 0; } -__printf(4, 5) +__printf(3, 4) __cold -static void block_group_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *eb, int slot, +static void block_group_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { + const struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_key key; struct va_format vaf; va_list args; @@ -392,13 +392,13 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, * handle it. We care more about the size. */ if (key->offset == 0) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid block group size 0"); return -EUCLEAN; } if (item_size != sizeof(bgi)) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid item size, have %u expect %zu", item_size, sizeof(bgi)); return -EUCLEAN; @@ -408,7 +408,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, sizeof(bgi)); if (btrfs_block_group_chunk_objectid(&bgi) != BTRFS_FIRST_CHUNK_TREE_OBJECTID) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid block group chunk objectid, have %llu expect %llu", btrfs_block_group_chunk_objectid(&bgi), BTRFS_FIRST_CHUNK_TREE_OBJECTID); @@ -416,7 +416,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, } if (btrfs_block_group_used(&bgi) > key->offset) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid block group used, have %llu expect [0, %llu)", btrfs_block_group_used(&bgi), key->offset); return -EUCLEAN; @@ -424,7 +424,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, flags = btrfs_block_group_flags(&bgi); if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); @@ -437,7 +437,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, type != BTRFS_BLOCK_GROUP_SYSTEM && type != (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) { - block_group_err(fs_info, leaf, slot, + block_group_err(leaf, slot, "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", type, hweight64(type), BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, -- cgit v1.2.3-59-g8ed1b From af60ce2b9354e6aeea36965f10be3d31f1710582 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:19:31 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_block_group_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index b7201f4a12a6..9513ffd73453 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -378,8 +378,7 @@ static void block_group_err(const struct extent_buffer *eb, int slot, va_end(args); } -static int check_block_group_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_block_group_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_block_group_item bgi; @@ -792,7 +791,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, ret = check_dir_item(leaf, key, slot); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: - ret = check_block_group_item(fs_info, leaf, key, slot); + ret = check_block_group_item(leaf, key, slot); break; case BTRFS_CHUNK_ITEM_KEY: chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); -- cgit v1.2.3-59-g8ed1b From ae2a19d8ad2ec18ef61a186ebd768d3a6fef14eb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:21:10 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_extent_data_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9513ffd73453..c5dd7adea306 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -104,10 +104,10 @@ static void file_extent_err(const struct extent_buffer *eb, int slot, (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) -static int check_extent_data_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_extent_data_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_file_extent_item *fi; u32 sectorsize = fs_info->sectorsize; u32 item_size = btrfs_item_size_nr(leaf, slot); @@ -780,7 +780,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, switch (key->type) { case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(fs_info, leaf, key, slot); + ret = check_extent_data_item(leaf, key, slot); break; case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(leaf, key, slot); -- cgit v1.2.3-59-g8ed1b From 0076bc89a77a62bb765a32c6ab6013ea7f9f1eb0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:00 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_leaf_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index c5dd7adea306..8cae930a666d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -771,8 +771,7 @@ static int check_inode_item(struct btrfs_fs_info *fs_info, /* * Common point to switch the item-specific validation. */ -static int check_leaf_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_leaf_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { int ret = 0; @@ -795,14 +794,14 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, break; case BTRFS_CHUNK_ITEM_KEY: chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, + ret = btrfs_check_chunk_valid(leaf->fs_info, leaf, chunk, key->offset); break; case BTRFS_DEV_ITEM_KEY: - ret = check_dev_item(fs_info, leaf, key, slot); + ret = check_dev_item(leaf->fs_info, leaf, key, slot); break; case BTRFS_INODE_ITEM_KEY: - ret = check_inode_item(fs_info, leaf, key, slot); + ret = check_inode_item(leaf->fs_info, leaf, key, slot); break; } return ret; @@ -952,7 +951,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, * Check if the item size and content meet other * criteria */ - ret = check_leaf_item(fs_info, leaf, &key, slot); + ret = check_leaf_item(leaf, &key, slot); if (ret < 0) return ret; } -- cgit v1.2.3-59-g8ed1b From e2ccd361ef06d18eaba488a568ed4d3664f2d4a7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:58 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_leaf We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 8cae930a666d..65e2906d133e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -807,9 +807,9 @@ static int check_leaf_item(struct extent_buffer *leaf, return ret; } -static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, - bool check_item_data) +static int check_leaf(struct extent_buffer *leaf, bool check_item_data) { + struct btrfs_fs_info *fs_info = leaf->fs_info; /* No valid key type is 0, so all key should be larger than this key */ struct btrfs_key prev_key = {0, 0, 0}; struct btrfs_key key; @@ -967,13 +967,13 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf) { - return check_leaf(fs_info, leaf, true); + return check_leaf(leaf, true); } int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf) { - return check_leaf(fs_info, leaf, false); + return check_leaf(leaf, false); } int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) -- cgit v1.2.3-59-g8ed1b From d001e4a3fe3959bdd7b5d4d29246082305d1b840 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:58 +0100 Subject: btrfs: tree-checker: get fs_info from eb in chunk_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 65e2906d133e..1dac957eaf35 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -447,13 +447,13 @@ static int check_block_group_item(struct extent_buffer *leaf, return 0; } -__printf(5, 6) +__printf(4, 5) __cold -static void chunk_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *leaf, +static void chunk_err(const struct extent_buffer *leaf, const struct btrfs_chunk *chunk, u64 logical, const char *fmt, ...) { + const struct btrfs_fs_info *fs_info = leaf->fs_info; bool is_sb; struct va_format vaf; va_list args; @@ -517,37 +517,37 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, type = btrfs_chunk_type(leaf, chunk); if (!num_stripes) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); return -EUCLEAN; } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", logical, fs_info->sectorsize); return -EUCLEAN; } if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk sectorsize, have %u expect %u", btrfs_chunk_sector_size(leaf, chunk), fs_info->sectorsize); return -EUCLEAN; } if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk length, have %llu", length); return -EUCLEAN; } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", stripe_len); return -EUCLEAN; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "unrecognized chunk type: 0x%llx", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & @@ -557,13 +557,13 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", type & BTRFS_BLOCK_GROUP_PROFILE_MASK); return -EUCLEAN; } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", type, BTRFS_BLOCK_GROUP_TYPE_MASK); return -EUCLEAN; @@ -571,7 +571,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "system chunk with data or metadata type: 0x%llx", type); return -EUCLEAN; @@ -584,7 +584,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!mixed) { if ((type & BTRFS_BLOCK_GROUP_METADATA) && (type & BTRFS_BLOCK_GROUP_DATA)) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); return -EUCLEAN; } @@ -596,7 +596,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { - chunk_err(fs_info, leaf, chunk, logical, + chunk_err(leaf, chunk, logical, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); -- cgit v1.2.3-59-g8ed1b From 5617ed80cbaa9638c6dadfd1b8ca268611e1267a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:58 +0100 Subject: btrfs: tree-checker: get fs_info from eb in dev_item_err We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 1dac957eaf35..03bee2b0e1d5 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -606,10 +606,9 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return 0; } -__printf(4, 5) +__printf(3, 4) __cold -static void dev_item_err(const struct btrfs_fs_info *fs_info, - const struct extent_buffer *eb, int slot, +static void dev_item_err(const struct extent_buffer *eb, int slot, const char *fmt, ...) { struct btrfs_key key; @@ -622,7 +621,7 @@ static void dev_item_err(const struct btrfs_fs_info *fs_info, vaf.fmt = fmt; vaf.va = &args; - btrfs_crit(fs_info, + btrfs_crit(eb->fs_info, "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, @@ -638,20 +637,20 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { - dev_item_err(fs_info, leaf, slot, + dev_item_err(leaf, slot, "invalid objectid: has=%llu expect=%llu", key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } if (key->offset > max_devid) { - dev_item_err(fs_info, leaf, slot, + dev_item_err(leaf, slot, "invalid devid: has=%llu expect=[0, %llu]", key->offset, max_devid); return -EUCLEAN; } ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { - dev_item_err(fs_info, leaf, slot, + dev_item_err(leaf, slot, "devid mismatch: key has=%llu item has=%llu", key->offset, btrfs_device_id(leaf, ditem)); return -EUCLEAN; @@ -664,7 +663,7 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, */ if (btrfs_device_bytes_used(leaf, ditem) > btrfs_device_total_bytes(leaf, ditem)) { - dev_item_err(fs_info, leaf, slot, + dev_item_err(leaf, slot, "invalid bytes used: have %llu expect [0, %llu]", btrfs_device_bytes_used(leaf, ditem), btrfs_device_total_bytes(leaf, ditem)); -- cgit v1.2.3-59-g8ed1b From 412a23127c58c54f709015342af5e724681a5b2d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:58 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_dev_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 03bee2b0e1d5..752ac3bc2587 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -629,10 +629,10 @@ static void dev_item_err(const struct extent_buffer *eb, int slot, va_end(args); } -static int check_dev_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_dev_item *ditem; u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); @@ -797,7 +797,7 @@ static int check_leaf_item(struct extent_buffer *leaf, key->offset); break; case BTRFS_DEV_ITEM_KEY: - ret = check_dev_item(leaf->fs_info, leaf, key, slot); + ret = check_dev_item(leaf, key, slot); break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf->fs_info, leaf, key, slot); -- cgit v1.2.3-59-g8ed1b From 39e57f495bf94c4d36a8048aea37b1cd2ab730bc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:22:58 +0100 Subject: btrfs: tree-checker: get fs_info from eb in check_inode_item We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 752ac3bc2587..6828de4e976c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -680,10 +680,10 @@ static int check_dev_item(struct extent_buffer *leaf, #define inode_item_err(fs_info, eb, slot, fmt, ...) \ dir_item_err(eb, slot, fmt, __VA_ARGS__) -static int check_inode_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int check_inode_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_inode_item *iitem; u64 super_gen = btrfs_super_generation(fs_info->super_copy); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); @@ -800,7 +800,7 @@ static int check_leaf_item(struct extent_buffer *leaf, ret = check_dev_item(leaf, key, slot); break; case BTRFS_INODE_ITEM_KEY: - ret = check_inode_item(leaf->fs_info, leaf, key, slot); + ret = check_inode_item(leaf, key, slot); break; } return ret; -- cgit v1.2.3-59-g8ed1b From ae0bc863106ae45b189986c76fda44f6885e8769 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 29 Mar 2019 14:03:17 +0800 Subject: btrfs: drop unused parameter in mount_subvol @device_name in mount_subvol() is not used, drop it. Also see: 5bedc48a8f9e ("btrfs: drop unused parameters from mount_subvol"). Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 120e4340792a..2c66d9ea6a3b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1400,7 +1400,7 @@ static inline int is_subvolume_inode(struct inode *inode) } static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, - const char *device_name, struct vfsmount *mnt) + struct vfsmount *mnt) { struct dentry *root; int ret; @@ -1649,7 +1649,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } /* mount_subvol() will free subvol_name and mnt_root */ - root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); + root = mount_subvol(subvol_name, subvol_objectid, mnt_root); out: return root; -- cgit v1.2.3-59-g8ed1b From 8b4d1efc9e6c326fe73a88d562c9d1c571493d32 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 2 Apr 2019 18:07:41 +0800 Subject: btrfs: prop: open code btrfs_set_prop in inherit_prop When an inode inherits property from its parent, we call btrfs_set_prop(). btrfs_set_prop() does an elaborate checks, which is not required in the context of inheriting a property. Instead just open-code only the required items from btrfs_set_prop() and then call btrfs_setxattr() directly. So now the only user of btrfs_set_prop() is gone, (except for the wraper function btrfs_set_prop_trans()). Reviewed-by: Nikolay Borisov Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 722ccf6bdd2b..0d1c3485c098 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -344,20 +344,38 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (!value) continue; + /* + * This is not strictly necessary as the property should be + * valid, but in case it isn't, don't propagate it futher. + */ + ret = h->validate(value, strlen(value)); + if (ret) + continue; + num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); ret = btrfs_block_rsv_add(root, trans->block_rsv, num_bytes, BTRFS_RESERVE_NO_FLUSH); if (ret) - goto out; - ret = btrfs_set_prop(trans, inode, h->xattr_name, value, + return ret; + + ret = btrfs_setxattr(trans, inode, h->xattr_name, value, strlen(value), 0); + if (!ret) { + ret = h->apply(inode, value, strlen(value)); + if (ret) + btrfs_setxattr(trans, inode, h->xattr_name, + NULL, 0, 0); + else + set_bit(BTRFS_INODE_HAS_PROPS, + &BTRFS_I(inode)->runtime_flags); + } + btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); if (ret) - goto out; + return ret; } - ret = 0; -out: - return ret; + + return 0; } int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, -- cgit v1.2.3-59-g8ed1b From 040ee6120cb6706aa840a62f3b7966ebce9bfcd3 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 29 Mar 2019 18:03:27 +0800 Subject: Btrfs: send, improve clone range Improve clone_range in two scenarios. 1. Remove the limit of inode size when find clone inodes We can do partial clone, so there is no need to limit the size of the candidate inode. When clone a range, we clone the legal range only by bytenr, offset, len, inode size. 2. In the scenarios of rewrite or clone_range, data_offset rarely matches exactly, so the chance of a clone is missed. e.g. 1. Write a 1M file dd if=/dev/zero of=1M bs=1M count=1 2. Clone 1M file cp --reflink 1M clone 3. Rewrite 4k on the clone file dd if=/dev/zero of=clone bs=4k count=1 conv=notrunc The disk layout is as follows: item 16 key (257 EXTENT_DATA 0) itemoff 15353 itemsize 53 extent data disk byte 1103101952 nr 1048576 extent data offset 0 nr 1048576 ram 1048576 extent compression(none) ... item 22 key (258 EXTENT_DATA 0) itemoff 14959 itemsize 53 extent data disk byte 1104150528 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression(none) item 23 key (258 EXTENT_DATA 4096) itemoff 14906 itemsize 53 extent data disk byte 1103101952 nr 1048576 extent data offset 4096 nr 1044480 ram 1048576 extent compression(none) When send, inode 258 file offset 4096~1048576 (item 23) has a chance to clone_range, but because data_offset does not match inode 257 (item 16), it causes missed clone and can only transfer actual data. Improve the problem by judging whether the current data_offset has overlap with the file extent item, and if so, adjusting offset and extent_len so that we can clone correctly. Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko Signed-off-by: David Sterba --- fs/btrfs/send.c | 52 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7ea2d6b1f170..1e9caa552235 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1160,7 +1160,6 @@ out: struct backref_ctx { struct send_ctx *sctx; - struct btrfs_path *path; /* number of total found references */ u64 found; @@ -1213,8 +1212,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) { struct backref_ctx *bctx = ctx_; struct clone_root *found; - int ret; - u64 i_size; /* First check if the root is in the list of accepted clone sources */ found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, @@ -1230,19 +1227,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) bctx->found_itself = 1; } - /* - * There are inodes that have extents that lie behind its i_size. Don't - * accept clones from these extents. - */ - ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, - NULL, NULL, NULL); - btrfs_release_path(bctx->path); - if (ret < 0) - return ret; - - if (offset + bctx->data_offset + bctx->extent_len > i_size) - return 0; - /* * Make sure we don't consider clones from send_root that are * behind the current inode/offset. @@ -1319,8 +1303,6 @@ static int find_extent_clone(struct send_ctx *sctx, goto out; } - backref_ctx->path = tmp_path; - if (data_offset >= ino_size) { /* * There may be extents that lie behind the file's size. @@ -5082,6 +5064,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *path; struct btrfs_key key; int ret; + u64 clone_src_i_size; /* * Prevent cloning from a zero offset with a length matching the sector @@ -5106,6 +5089,16 @@ static int clone_range(struct send_ctx *sctx, if (!path) return -ENOMEM; + /* + * There are inodes that have extents that lie behind its i_size. Don't + * accept clones from these extents. + */ + ret = __get_inode_info(clone_root->root, path, clone_root->ino, + &clone_src_i_size, NULL, NULL, NULL, NULL, NULL); + btrfs_release_path(path); + if (ret < 0) + goto out; + /* * We can't send a clone operation for the entire range if we find * extent items in the respective range in the source file that @@ -5148,6 +5141,7 @@ static int clone_range(struct send_ctx *sctx, u8 type; u64 ext_len; u64 clone_len; + u64 clone_data_offset; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5201,10 +5195,30 @@ static int clone_range(struct send_ctx *sctx, if (key.offset >= clone_root->offset + len) break; + if (key.offset >= clone_src_i_size) + break; + + if (key.offset + ext_len > clone_src_i_size) + ext_len = clone_src_i_size - key.offset; + + clone_data_offset = btrfs_file_extent_offset(leaf, ei); + if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { + clone_root->offset = key.offset; + if (clone_data_offset < data_offset && + clone_data_offset + ext_len > data_offset) { + u64 extent_offset; + + extent_offset = data_offset - clone_data_offset; + ext_len -= extent_offset; + clone_data_offset += extent_offset; + clone_root->offset += extent_offset; + } + } + clone_len = min_t(u64, ext_len, len); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && - btrfs_file_extent_offset(leaf, ei) == data_offset) + clone_data_offset == data_offset) ret = send_clone(sctx, offset, clone_len, clone_root); else ret = send_extent_data(sctx, offset, clone_len); -- cgit v1.2.3-59-g8ed1b From c2d1b3aae33605a61cbab445d8ae1c708ccd2698 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:21 +0200 Subject: btrfs: Honour FITRIM range constraints during free space trim Up until now trimming the freespace was done irrespective of what the arguments of the FITRIM ioctl were. For example fstrim's -o/-l arguments will be entirely ignored. Fix it by correctly handling those paramter. This requires breaking if the found freespace extent is after the end of the passed range as well as completing trim after trimming fstrim_range::len bytes. Fixes: 499f377f49f0 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aa52b0995fba..c5f9e8359c6f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11309,9 +11309,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, - u64 minlen, u64 *trimmed) + struct fstrim_range *range, u64 *trimmed) { - u64 start = 0, len = 0; + u64 start = range->start, len = 0; int ret; *trimmed = 0; @@ -11354,8 +11354,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, if (!trans) up_read(&fs_info->commit_root_sem); - ret = find_free_dev_extent_start(trans, device, minlen, start, - &start, &len); + ret = find_free_dev_extent_start(trans, device, range->minlen, + start, &start, &len); if (trans) { up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); @@ -11368,6 +11368,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } + /* If we are out of the passed range break */ + if (start > range->start + range->len - 1) { + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + + start = max(range->start, start); + len = min(range->len, len); + ret = btrfs_issue_discard(device->bdev, start, len, &bytes); mutex_unlock(&fs_info->chunk_mutex); @@ -11377,6 +11387,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; + /* We've trimmed enough */ + if (*trimmed >= range->len) + break; + if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -11460,8 +11474,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range->minlen, - &group_trimmed); + ret = btrfs_trim_free_extents(device, range, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; -- cgit v1.2.3-59-g8ed1b From bbbf7243d62d8be73b7ef60721c127b36b2d523e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:22 +0200 Subject: btrfs: combine device update operations during transaction commit We currently overload the pending_chunks list to handle updating btrfs_device->commit_bytes used. We don't actually care about the extent mapping or even the device mapping for the chunk - we just need the device, and we can end up processing it multiple times. The fs_devices->resized_list does more or less the same thing, but with the disk size. They are called consecutively during commit and have more or less the same purpose. We can combine the two lists into a single list that attaches to the transaction and contains a list of devices that need updating. Since we always add the device to a list when we change bytes_used or disk_total_size, there's no harm in copying both values at once. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/disk-io.c | 7 ++++ fs/btrfs/transaction.c | 5 +-- fs/btrfs/transaction.h | 1 + fs/btrfs/volumes.c | 87 +++++++++++++++++++++----------------------------- fs/btrfs/volumes.h | 13 ++------ 6 files changed, 51 insertions(+), 64 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index ee193c5222b2..dba43ada41d1 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_device_set_disk_total_bytes(tgt_device, src_device->disk_total_bytes); btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); - ASSERT(list_empty(&src_device->resized_list)); + ASSERT(list_empty(&src_device->post_commit_list)); tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->commit_bytes_used = src_device->bytes_used; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0fa65aca56a3..de1dcccc7c9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4479,10 +4479,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { + struct btrfs_device *dev, *tmp; + btrfs_cleanup_dirty_bgs(cur_trans, fs_info); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); + list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list, + post_commit_list) { + list_del_init(&dev->post_commit_list); + } + btrfs_destroy_delayed_refs(cur_trans, fs_info); cur_trans->state = TRANS_STATE_COMMIT_START; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f1732b77a379..4aa827a2e951 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -75,6 +75,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) btrfs_put_block_group_trimming(cache); btrfs_put_block_group(cache); } + WARN_ON(!list_empty(&transaction->dev_update_list)); kfree(transaction); } } @@ -264,6 +265,7 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->pending_chunks); + INIT_LIST_HEAD(&cur_trans->dev_update_list); INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); @@ -2241,8 +2243,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) memcpy(fs_info->super_for_commit, fs_info->super_copy, sizeof(*fs_info->super_copy)); - btrfs_update_commit_device_size(fs_info); - btrfs_update_commit_device_bytes_used(cur_trans); + btrfs_commit_device_sizes(cur_trans); clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags); clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b34678e7968e..2bd76f681520 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -52,6 +52,7 @@ struct btrfs_transaction { wait_queue_head_t commit_wait; struct list_head pending_snapshots; struct list_head pending_chunks; + struct list_head dev_update_list; struct list_head switch_commits; struct list_head dirty_bgs; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 77bca3a61e26..72e069c227ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -319,7 +319,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid, mutex_init(&fs_devs->device_list_mutex); INIT_LIST_HEAD(&fs_devs->devices); - INIT_LIST_HEAD(&fs_devs->resized_devices); INIT_LIST_HEAD(&fs_devs->alloc_list); INIT_LIST_HEAD(&fs_devs->fs_list); if (fsid) @@ -335,6 +334,7 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid, void btrfs_free_device(struct btrfs_device *device) { + WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); bio_put(device->flush_bio); kfree(device); @@ -403,7 +403,7 @@ static struct btrfs_device *__alloc_device(void) INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); - INIT_LIST_HEAD(&dev->resized_list); + INIT_LIST_HEAD(&dev->post_commit_list); spin_lock_init(&dev->io_lock); @@ -2853,7 +2853,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_super_block *super_copy = fs_info->super_copy; - struct btrfs_fs_devices *fs_devices; u64 old_total; u64 diff; @@ -2872,8 +2871,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, return -EINVAL; } - fs_devices = fs_info->fs_devices; - btrfs_set_super_total_bytes(super_copy, round_down(old_total + diff, fs_info->sectorsize)); device->fs_devices->total_rw_bytes += diff; @@ -2881,9 +2878,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, btrfs_device_set_total_bytes(device, new_size); btrfs_device_set_disk_total_bytes(device, new_size); btrfs_clear_space_info_full(device->fs_info); - if (list_empty(&device->resized_list)) - list_add_tail(&device->resized_list, - &fs_devices->resized_devices); + if (list_empty(&device->post_commit_list)) + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); mutex_unlock(&fs_info->chunk_mutex); return btrfs_update_device(trans, device); @@ -4872,9 +4869,9 @@ again: } btrfs_device_set_disk_total_bytes(device, new_size); - if (list_empty(&device->resized_list)) - list_add_tail(&device->resized_list, - &fs_info->fs_devices->resized_devices); + if (list_empty(&device->post_commit_list)) + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); WARN_ON(diff > old_total); btrfs_set_super_total_bytes(super_copy, @@ -5214,9 +5211,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (ret) goto error_del_extent; - for (i = 0; i < map->num_stripes; i++) - btrfs_device_set_bytes_used(map->stripes[i].dev, - map->stripes[i].dev->bytes_used + stripe_size); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *dev = map->stripes[i].dev; + + btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size); + if (list_empty(&dev->post_commit_list)) + list_add_tail(&dev->post_commit_list, + &trans->transaction->dev_update_list); + } atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space); @@ -7579,51 +7581,34 @@ void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat } /* - * Update the size of all devices, which is used for writing out the - * super blocks. + * Update the size and bytes used for each device where it changed. This is + * delayed since we would otherwise get errors while writing out the + * superblocks. + * + * Must be invoked during transaction commit. */ -void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) +void btrfs_commit_device_sizes(struct btrfs_transaction *trans) { - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *curr, *next; - if (list_empty(&fs_devices->resized_devices)) - return; - - mutex_lock(&fs_devices->device_list_mutex); - mutex_lock(&fs_info->chunk_mutex); - list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, - resized_list) { - list_del_init(&curr->resized_list); - curr->commit_total_bytes = curr->disk_total_bytes; - } - mutex_unlock(&fs_info->chunk_mutex); - mutex_unlock(&fs_devices->device_list_mutex); -} - -/* Must be invoked during the transaction commit */ -void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans) -{ - struct btrfs_fs_info *fs_info = trans->fs_info; - struct extent_map *em; - struct map_lookup *map; - struct btrfs_device *dev; - int i; + ASSERT(trans->state == TRANS_STATE_COMMIT_DOING); - if (list_empty(&trans->pending_chunks)) + if (list_empty(&trans->dev_update_list)) return; - /* In order to kick the device replace finish process */ - mutex_lock(&fs_info->chunk_mutex); - list_for_each_entry(em, &trans->pending_chunks, list) { - map = em->map_lookup; - - for (i = 0; i < map->num_stripes; i++) { - dev = map->stripes[i].dev; - dev->commit_bytes_used = dev->bytes_used; - } + /* + * We don't need the device_list_mutex here. This list is owned by the + * transaction and the transaction must complete before the device is + * released. + */ + mutex_lock(&trans->fs_info->chunk_mutex); + list_for_each_entry_safe(curr, next, &trans->dev_update_list, + post_commit_list) { + list_del_init(&curr->post_commit_list); + curr->commit_total_bytes = curr->disk_total_bytes; + curr->commit_bytes_used = curr->bytes_used; } - mutex_unlock(&fs_info->chunk_mutex); + mutex_unlock(&trans->fs_info->chunk_mutex); } void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 38ed94b77202..b9912b910d6d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -45,6 +45,7 @@ struct btrfs_pending_bios { struct btrfs_device { struct list_head dev_list; struct list_head dev_alloc_list; + struct list_head post_commit_list; /* chunk mutex */ struct btrfs_fs_devices *fs_devices; struct btrfs_fs_info *fs_info; @@ -102,18 +103,12 @@ struct btrfs_device { * size of the device on the current transaction * * This variant is update when committing the transaction, - * and protected by device_list_mutex + * and protected by chunk mutex */ u64 commit_total_bytes; /* bytes used on the current transaction */ u64 commit_bytes_used; - /* - * used to manage the device which is resized - * - * It is protected by chunk_lock. - */ - struct list_head resized_list; /* for sending down flush barriers */ struct bio *flush_bio; @@ -235,7 +230,6 @@ struct btrfs_fs_devices { struct mutex device_list_mutex; struct list_head devices; - struct list_head resized_devices; /* devices not currently being allocated */ struct list_head alloc_list; @@ -567,8 +561,7 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) const char *get_raid_name(enum btrfs_raid_types type); -void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); -void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans); +void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head *btrfs_get_fs_uuids(void); void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); -- cgit v1.2.3-59-g8ed1b From 61d0d0d2cbb7e52d3defe8089d5f360cf6d836ef Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:23 +0200 Subject: btrfs: Handle pending/pinned chunks before blockgroup relocation during device shrink During device shrink pinned/pending chunks (i.e. those which have been deleted/created respectively, in the current transaction and haven't touched disk) need to be accounted when doing device shrink. Presently this happens after the main relocation loop in btrfs_shrink_device, which could lead to making another go in the body of the function. Since there is no hard requirement to perform pinned/pending chunks handling after the relocation loop, move the code before it. This leads to simplifying the code flow around - i.e. no need to use 'goto again'. A notable side effect of this change is that modification of the device's size requires a transaction to be started and committed before the relocation loop starts. This is necessary to ensure that relocation process sees the shrunk device size. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 55 +++++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 72e069c227ab..a64b76d93cf0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4720,15 +4720,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) int slot; int failed = 0; bool retried = false; - bool checked_pending_chunks = false; struct extent_buffer *l; struct btrfs_key key; struct btrfs_super_block *super_copy = fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); u64 old_size = btrfs_device_get_total_bytes(device); u64 diff; + u64 start; new_size = round_down(new_size, fs_info->sectorsize); + start = new_size; diff = round_down(old_size - new_size, fs_info->sectorsize); if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) @@ -4740,6 +4741,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) path->reada = READA_BACK; + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + mutex_lock(&fs_info->chunk_mutex); btrfs_device_set_total_bytes(device, new_size); @@ -4747,7 +4754,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) device->fs_devices->total_rw_bytes -= diff; atomic64_sub(diff, &fs_info->free_chunk_space); } - mutex_unlock(&fs_info->chunk_mutex); + + /* + * Once the device's size has been set to the new size, ensure all + * in-memory chunks are synced to disk so that the loop below sees them + * and relocates them accordingly. + */ + if (contains_pending_extent(trans->transaction, device, &start, diff)) { + mutex_unlock(&fs_info->chunk_mutex); + ret = btrfs_commit_transaction(trans); + if (ret) + goto done; + } else { + mutex_unlock(&fs_info->chunk_mutex); + btrfs_end_transaction(trans); + } again: key.objectid = device->devid; @@ -4838,36 +4859,6 @@ again: } mutex_lock(&fs_info->chunk_mutex); - - /* - * We checked in the above loop all device extents that were already in - * the device tree. However before we have updated the device's - * total_bytes to the new size, we might have had chunk allocations that - * have not complete yet (new block groups attached to transaction - * handles), and therefore their device extents were not yet in the - * device tree and we missed them in the loop above. So if we have any - * pending chunk using a device extent that overlaps the device range - * that we can not use anymore, commit the current transaction and - * repeat the search on the device tree - this way we guarantee we will - * not have chunks using device extents that end beyond 'new_size'. - */ - if (!checked_pending_chunks) { - u64 start = new_size; - u64 len = old_size - new_size; - - if (contains_pending_extent(trans->transaction, device, - &start, len)) { - mutex_unlock(&fs_info->chunk_mutex); - checked_pending_chunks = true; - failed = 0; - retried = false; - ret = btrfs_commit_transaction(trans); - if (ret) - goto done; - goto again; - } - } - btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) list_add_tail(&device->post_commit_list, -- cgit v1.2.3-59-g8ed1b From 41e7acd38c1ae82f24f51d302bbdecdb4675b6b2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:24 +0200 Subject: btrfs: Rename and export clear_btree_io_tree This function is going to be used to clear out the device extent allocation information. Give it a more generic name and export it. This is in preparation to replacing the pending/pinned chunk lists with an extent tree. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 29 +++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 1 + fs/btrfs/transaction.c | 37 ++++--------------------------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a3e3e95c632e..26d7b6a5d567 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -266,6 +266,35 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info, tree->owner = owner; } +void extent_io_tree_release(struct extent_io_tree *tree) +{ + spin_lock(&tree->lock); + /* + * Do a single barrier for the waitqueue_active check here, the state + * of the waitqueue should not change once extent_io_tree_release is + * called. + */ + smp_mb(); + while (!RB_EMPTY_ROOT(&tree->state)) { + struct rb_node *node; + struct extent_state *state; + + node = rb_first(&tree->state); + state = rb_entry(node, struct extent_state, rb_node); + rb_erase(&state->rb_node, &tree->state); + RB_CLEAR_NODE(&state->rb_node); + /* + * btree io trees aren't supposed to have tasks waiting for + * changes in the flags of extent states ever. + */ + ASSERT(!waitqueue_active(&state->wq)); + free_extent_state(state); + + cond_resched_lock(&tree->lock); + } + spin_unlock(&tree->lock); +} + static struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c4ec104ac157..722dc7d1b674 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -255,6 +255,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, void extent_io_tree_init(struct btrfs_fs_info *fs_info, struct extent_io_tree *tree, unsigned int owner, void *private_data); +void extent_io_tree_release(struct extent_io_tree *tree); int try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4aa827a2e951..b32769998bbb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -80,35 +80,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) } } -static void clear_btree_io_tree(struct extent_io_tree *tree) -{ - spin_lock(&tree->lock); - /* - * Do a single barrier for the waitqueue_active check here, the state - * of the waitqueue should not change once clear_btree_io_tree is - * called. - */ - smp_mb(); - while (!RB_EMPTY_ROOT(&tree->state)) { - struct rb_node *node; - struct extent_state *state; - - node = rb_first(&tree->state); - state = rb_entry(node, struct extent_state, rb_node); - rb_erase(&state->rb_node, &tree->state); - RB_CLEAR_NODE(&state->rb_node); - /* - * btree io trees aren't supposed to have tasks waiting for - * changes in the flags of extent states ever. - */ - ASSERT(!waitqueue_active(&state->wq)); - free_extent_state(state); - - cond_resched_lock(&tree->lock); - } - spin_unlock(&tree->lock); -} - static noinline void switch_commit_roots(struct btrfs_transaction *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -122,7 +93,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans) root->commit_root = btrfs_root_node(root); if (is_fstree(root->root_key.objectid)) btrfs_unpin_free_ino(root); - clear_btree_io_tree(&root->dirty_log_pages); + extent_io_tree_release(&root->dirty_log_pages); btrfs_qgroup_clean_swapped_blocks(root); } @@ -930,7 +901,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, * superblock that points to btree nodes/leafs for which * writeback hasn't finished yet (and without errors). * We cleanup any entries left in the io tree when committing - * the transaction (through clear_btree_io_tree()). + * the transaction (through extent_io_tree_release()). */ if (err == -ENOMEM) { err = 0; @@ -975,7 +946,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info, * left in the io tree. For a log commit, we don't remove them * after committing the log because the tree can be accessed * concurrently - we do it only at transaction commit time when - * it's safe to do it (through clear_btree_io_tree()). + * it's safe to do it (through extent_io_tree_release()). */ err = clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, 0, 0, &cached_state); @@ -1053,7 +1024,7 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans) blk_finish_plug(&plug); ret2 = btrfs_wait_extents(fs_info, dirty_pages); - clear_btree_io_tree(&trans->transaction->dirty_pages); + extent_io_tree_release(&trans->transaction->dirty_pages); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 39e264a40dd7998f844ab888cc9640bff4841f8f Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:25 +0200 Subject: btrfs: Populate ->orig_block_len during read_one_chunk Chunks read from disk currently don't get their ->orig_block_len member set, in contrast when a new chunk is allocated, the respective extent_map's ->orig_block_len is assigned the size of the stripe of this chunk. Let's apply the same strategy for chunks which are read from disk, not only does this codify the invariant that ->orig_block_len always contains the size of the stripe for a chunk (when the em belongs to the mapping tree). But it's also a preparatory patch for further work around tracking chunk allocation in an extent tree rather than pinned/pending lists. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a64b76d93cf0..66b8b492d6f5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6710,6 +6710,26 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, devid, uuid); } +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) +{ + int index = btrfs_bg_flags_to_raid_index(type); + int ncopies = btrfs_raid_array[index].ncopies; + int data_stripes; + + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case BTRFS_BLOCK_GROUP_RAID5: + data_stripes = num_stripes - 1; + break; + case BTRFS_BLOCK_GROUP_RAID6: + data_stripes = num_stripes - 2; + break; + default: + data_stripes = num_stripes / ncopies; + break; + } + return div_u64(chunk_len, data_stripes); +} + static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -6775,6 +6795,8 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, map->type = btrfs_chunk_type(leaf, chunk); map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); map->verified_stripes = 0; + em->orig_block_len = calc_stripe_length(map->type, em->len, + map->num_stripes); for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); @@ -7632,25 +7654,6 @@ int btrfs_bg_type_to_factor(u64 flags) } -static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) -{ - int index = btrfs_bg_flags_to_raid_index(type); - int ncopies = btrfs_raid_array[index].ncopies; - int data_stripes; - - switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { - case BTRFS_BLOCK_GROUP_RAID5: - data_stripes = num_stripes - 1; - break; - case BTRFS_BLOCK_GROUP_RAID6: - data_stripes = num_stripes - 2; - break; - default: - data_stripes = num_stripes / ncopies; - break; - } - return div_u64(chunk_len, data_stripes); -} static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, -- cgit v1.2.3-59-g8ed1b From 930b09072977583226a05b4f2e1db259f9a2417b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:26 +0200 Subject: btrfs: Introduce new bits for device allocation tree Rather than hijacking the existing defines let's just define new bits, with more descriptive names. Instead of using yet more (currently at 18) bits for the new flags, use the fact those flags will be specific to the device allocation tree so define them using existing EXTENT_* flags. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 722dc7d1b674..71213438db3a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -27,6 +27,9 @@ EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) +/* Redefined bits above which are used only in the device allocation tree */ +#define CHUNK_ALLOCATED EXTENT_DIRTY + /* * flags for bio submission. The high bits indicate the compression * type for this bio -- cgit v1.2.3-59-g8ed1b From 4ca7365606ca08282da248fbc270abf58a515e20 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:10 +0200 Subject: btrfs: Implement set_extent_bits_nowait It will be used in a future patch that will require modifying an extent_io_tree struct under a spinlock. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 7 +++++++ fs/btrfs/extent_io.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 26d7b6a5d567..ff1f7b4ac02c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1368,6 +1368,13 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } +int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end, + unsigned bits) +{ + return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, + GFP_NOWAIT, NULL); +} + int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 71213438db3a..6435c2818ec3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -328,6 +328,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); +int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end, + unsigned bits); static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits) -- cgit v1.2.3-59-g8ed1b From 8e75fd893b0608aa3a45f8654b71960423df5f3e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:11 +0200 Subject: btrfs: Stop using call_rcu for device freeing btrfs_device structs are freed from RCU context since device iteration is protected by RCU. Currently this is achieved by using call_rcu since no blocking functions are called within btrfs_free_device. Future refactoring of pending/pinned chunks will require calling sleeping functions. This patch is in preparation for these changes by simply switching from RCU callbacks to explicit calls of synchronize_rcu and calling btrfs_free_device directly. This is functionally equivalent, making sure that there are no readers at that time. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 20 ++++++++------------ fs/btrfs/volumes.h | 1 - 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 66b8b492d6f5..2bde9e9c188e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1231,14 +1231,6 @@ again: mutex_unlock(&uuid_mutex); } -static void free_device_rcu(struct rcu_head *head) -{ - struct btrfs_device *device; - - device = container_of(head, struct btrfs_device, rcu); - btrfs_free_device(device); -} - static void btrfs_close_bdev(struct btrfs_device *device) { if (!device->bdev) @@ -1286,7 +1278,8 @@ static void btrfs_close_one_device(struct btrfs_device *device) list_replace_rcu(&device->dev_list, &new_device->dev_list); new_device->fs_devices = device->fs_devices; - call_rcu(&device->rcu, free_device_rcu); + synchronize_rcu(); + btrfs_free_device(device); } static int close_fs_devices(struct btrfs_fs_devices *fs_devices) @@ -2243,7 +2236,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, btrfs_scratch_superblocks(device->bdev, device->name->str); btrfs_close_bdev(device); - call_rcu(&device->rcu, free_device_rcu); + synchronize_rcu(); + btrfs_free_device(device); if (cur_devices->open_devices == 0) { while (fs_devices) { @@ -2311,7 +2305,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, } btrfs_close_bdev(srcdev); - call_rcu(&srcdev->rcu, free_device_rcu); + synchronize_rcu(); + btrfs_free_device(srcdev); /* if this is no devs we rather delete the fs_devices */ if (!fs_devices->num_devices) { @@ -2369,7 +2364,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); btrfs_close_bdev(tgtdev); - call_rcu(&tgtdev->rcu, free_device_rcu); + synchronize_rcu(); + btrfs_free_device(tgtdev); } static struct btrfs_device *btrfs_find_device_by_path( diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b9912b910d6d..d3658a4e65db 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -118,7 +118,6 @@ struct btrfs_device { struct scrub_ctx *scrub_ctx; struct btrfs_work work; - struct rcu_head rcu; /* readahead state */ atomic_t reada_in_flight; -- cgit v1.2.3-59-g8ed1b From 68c94e55e1502868813a3cac2febc021d01edb75 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Feb 2019 16:13:14 +0200 Subject: btrfs: Transpose btrfs_close_devices/btrfs_mapping_tree_free in close_ctree Following the introduction of the alloc_state tree, some of the callees of btrfs_mapping_tree_free will have to interact with the btrfs_device of the constituent devices. Enable this by moving the code responsible for freeing devices after the last user (btrfs_mapping_tree_free). Otherwise the kernel could crash due to use-after-free. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de1dcccc7c9d..8c10702a3f83 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4040,8 +4040,8 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfsic_unmount(fs_info->fs_devices); #endif - btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + btrfs_close_devices(fs_info->fs_devices); percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); -- cgit v1.2.3-59-g8ed1b From 1c11b63eff2a67906cb9137bc6b2ee27767f313b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 27 Mar 2019 14:24:12 +0200 Subject: btrfs: replace pending/pinned chunks lists with io tree The pending chunks list contains chunks that are allocated in the current transaction but haven't been created yet. The pinned chunks list contains chunks that are being released in the current transaction. Both describe chunks that are not reflected on disk as in use but are unavailable just the same. The pending chunks list is anchored by the transaction handle, which means that we need to hold a reference to a transaction when working with the list. The way we use them is by iterating over both lists to perform comparisons on the stripes they describe for each device. This is backwards and requires that we keep a transaction handle open while we're trimming. This patchset adds an extent_io_tree to btrfs_device that maintains the allocation state of the device. Extents are set dirty when chunks are first allocated -- when the extent maps are added to the mapping tree. They're cleared when last removed -- when the extent maps are removed from the mapping tree. This matches the lifespan of the pending and pinned chunks list and allows us to do trims on unallocated space safely without pinning the transaction for what may be a lengthy operation. We can also use this io tree to mark which chunks have already been trimmed so we don't repeat the operation. Signed-off-by: Jeff Mahoney Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ---- fs/btrfs/disk-io.c | 11 ------ fs/btrfs/extent-tree.c | 28 --------------- fs/btrfs/extent_map.c | 36 +++++++++++++++++++ fs/btrfs/free-space-cache.c | 4 --- fs/btrfs/transaction.c | 9 ----- fs/btrfs/transaction.h | 1 - fs/btrfs/volumes.c | 85 +++++++++++++-------------------------------- fs/btrfs/volumes.h | 2 ++ 9 files changed, 63 insertions(+), 119 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19833b4af630..93270e20a8e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1149,12 +1149,6 @@ struct btrfs_fs_info { struct mutex unused_bg_unpin_mutex; struct mutex delete_unused_bgs_mutex; - /* - * Chunks that can't be freed yet (under a trim/discard operation) - * and will be latter freed. Protected by fs_info->chunk_mutex. - */ - struct list_head pinned_chunks; - /* Cached block sizes */ u32 nodesize; u32 sectorsize; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8c10702a3f83..0b2b75a7efbd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2774,8 +2774,6 @@ int open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); - INIT_LIST_HEAD(&fs_info->pinned_chunks); - /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; fs_info->sectorsize = 4096; @@ -4050,15 +4048,6 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); - - while (!list_empty(&fs_info->pinned_chunks)) { - struct extent_map *em; - - em = list_first_entry(&fs_info->pinned_chunks, - struct extent_map, list); - list_del_init(&em->list); - free_extent_map(em); - } } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5f9e8359c6f..a9f504e7be33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10946,10 +10946,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); mutex_lock(&fs_info->chunk_mutex); - if (!list_empty(&em->list)) { - /* We're in the transaction->pending_chunks list. */ - free_extent_map(em); - } spin_lock(&block_group->lock); block_group->removed = 1; /* @@ -10976,25 +10972,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * the transaction commit has completed. */ remove_em = (atomic_read(&block_group->trimming) == 0); - /* - * Make sure a trimmer task always sees the em in the pinned_chunks list - * if it sees block_group->removed == 1 (needs to lock block_group->lock - * before checking block_group->removed). - */ - if (!remove_em) { - /* - * Our em might be in trans->transaction->pending_chunks which - * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks), - * and so is the fs_info->pinned_chunks list. - * - * So at this point we must be holding the chunk_mutex to avoid - * any races with chunk allocation (more specifically at - * volumes.c:contains_pending_extent()), to ensure it always - * sees the em, either in the pending_chunks list or in the - * pinned_chunks list. - */ - list_move_tail(&em->list, &fs_info->pinned_chunks); - } spin_unlock(&block_group->lock); if (remove_em) { @@ -11002,11 +10979,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, em_tree = &fs_info->mapping_tree.map_tree; write_lock(&em_tree->lock); - /* - * The em might be in the pending_chunks list, so make sure the - * chunk mutex is locked, since remove_extent_mapping() will - * delete us from that list. - */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); /* once for the tree */ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 928f729c55ba..5a79a656dfa6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -4,6 +4,7 @@ #include #include #include "ctree.h" +#include "volumes.h" #include "extent_map.h" #include "compression.h" @@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, try_merge_map(tree, em); } +static void extent_map_device_set_bits(struct extent_map *em, unsigned bits) +{ + struct map_lookup *map = em->map_lookup; + u64 stripe_size = em->orig_block_len; + int i; + + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + set_extent_bits_nowait(&device->alloc_state, stripe->physical, + stripe->physical + stripe_size - 1, bits); + } +} + +static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) +{ + struct map_lookup *map = em->map_lookup; + u64 stripe_size = em->orig_block_len; + int i; + + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + __clear_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + stripe_size - 1, bits, + 0, 0, NULL, GFP_NOWAIT, NULL); + } +} + /** * add_extent_mapping - add new extent map to the extent tree * @tree: tree to insert new map in @@ -357,6 +389,8 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; setup_extent_mapping(tree, em, modified); + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) + extent_map_device_set_bits(em, CHUNK_ALLOCATED); out: return ret; } @@ -438,6 +472,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) rb_erase_cached(&em->rb_node, &tree->map); if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) list_del_init(&em->list); + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) + extent_map_device_clear_bits(em, CHUNK_ALLOCATED); RB_CLEAR_NODE(&em->rb_node); } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 74aa552f4793..207fb50dcc7a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3366,10 +3366,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) em = lookup_extent_mapping(em_tree, block_group->key.objectid, 1); BUG_ON(!em); /* logic error, can't happen */ - /* - * remove_extent_mapping() will delete us from the pinned_chunks - * list, which is protected by the chunk mutex. - */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); mutex_unlock(&fs_info->chunk_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b32769998bbb..e5404326fc55 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) btrfs_err(transaction->fs_info, "pending csums is %llu", transaction->delayed_refs.pending_csums); - while (!list_empty(&transaction->pending_chunks)) { - struct extent_map *em; - - em = list_first_entry(&transaction->pending_chunks, - struct extent_map, list); - list_del_init(&em->list); - free_extent_map(em); - } /* * If any block groups are found in ->deleted_bgs then it's * because the transaction was aborted and a commit did not @@ -235,7 +227,6 @@ loop: spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->pending_snapshots); - INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->dev_update_list); INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->dirty_bgs); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2bd76f681520..4419a4a0294b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -51,7 +51,6 @@ struct btrfs_transaction { wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; - struct list_head pending_chunks; struct list_head dev_update_list; struct list_head switch_commits; struct list_head dirty_bgs; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2bde9e9c188e..2e5e48d8dd2f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -336,6 +336,7 @@ void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); + extent_io_tree_release(&device->alloc_state); bio_put(device->flush_bio); kfree(device); } @@ -412,6 +413,7 @@ static struct btrfs_device *__alloc_device(void) btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL); return dev; } @@ -1499,58 +1501,30 @@ error_bdev_put: return device; } -static int contains_pending_extent(struct btrfs_transaction *transaction, - struct btrfs_device *device, - u64 *start, u64 len) +/* + * Try to find a chunk that intersects [start, start + len] range and when one + * such is found, record the end of it in *start + */ +#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) +static bool contains_pending_extent(struct btrfs_device *device, u64 *start, + u64 len) { - struct btrfs_fs_info *fs_info = device->fs_info; - struct extent_map *em; - struct list_head *search_list = &fs_info->pinned_chunks; - int ret = 0; - u64 physical_start = *start; + u64 physical_start, physical_end; - if (transaction) - search_list = &transaction->pending_chunks; -again: - list_for_each_entry(em, search_list, list) { - struct map_lookup *map; - int i; + lockdep_assert_held(&device->fs_info->chunk_mutex); - map = em->map_lookup; - for (i = 0; i < map->num_stripes; i++) { - u64 end; + if (!find_first_extent_bit(&device->alloc_state, *start, + &physical_start, &physical_end, + CHUNK_ALLOCATED, NULL)) { - if (map->stripes[i].dev != device) - continue; - if (map->stripes[i].physical >= physical_start + len || - map->stripes[i].physical + em->orig_block_len <= - physical_start) - continue; - /* - * Make sure that while processing the pinned list we do - * not override our *start with a lower value, because - * we can have pinned chunks that fall within this - * device hole and that have lower physical addresses - * than the pending chunks we processed before. If we - * do not take this special care we can end up getting - * 2 pending chunks that start at the same physical - * device offsets because the end offset of a pinned - * chunk can be equal to the start offset of some - * pending chunk. - */ - end = map->stripes[i].physical + em->orig_block_len; - if (end > *start) { - *start = end; - ret = 1; - } + if (in_range(physical_start, *start, len) || + in_range(*start, physical_start, + physical_end - physical_start)) { + *start = physical_end + 1; + return true; } } - if (search_list != &fs_info->pinned_chunks) { - search_list = &fs_info->pinned_chunks; - goto again; - } - - return ret; + return false; } @@ -1661,15 +1635,12 @@ again: * Have to check before we set max_hole_start, otherwise * we could end up sending back this offset anyway. */ - if (contains_pending_extent(transaction, device, - &search_start, + if (contains_pending_extent(device, &search_start, hole_size)) { - if (key.offset >= search_start) { + if (key.offset >= search_start) hole_size = key.offset - search_start; - } else { - WARN_ON_ONCE(1); + else hole_size = 0; - } } if (hole_size > max_hole_size) { @@ -1710,8 +1681,7 @@ next: if (search_end > search_start) { hole_size = search_end - search_start; - if (contains_pending_extent(transaction, device, &search_start, - hole_size)) { + if (contains_pending_extent(device, &search_start, hole_size)) { btrfs_release_path(path); goto again; } @@ -4756,7 +4726,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) * in-memory chunks are synced to disk so that the loop below sees them * and relocates them accordingly. */ - if (contains_pending_extent(trans->transaction, device, &start, diff)) { + if (contains_pending_extent(device, &start, diff)) { mutex_unlock(&fs_info->chunk_mutex); ret = btrfs_commit_transaction(trans); if (ret) @@ -5189,9 +5159,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, free_extent_map(em); goto error; } - - list_add_tail(&em->list, &trans->transaction->pending_chunks); - refcount_inc(&em->refs); write_unlock(&em_tree->lock); ret = btrfs_make_block_group(trans, 0, type, start, chunk_size); @@ -5224,8 +5191,6 @@ error_del_extent: free_extent_map(em); /* One for the tree reference */ free_extent_map(em); - /* One for the pending_chunks list reference */ - free_extent_map(em); error: kfree(devices_info); return ret; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3658a4e65db..6c466ac27c2e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,6 +133,8 @@ struct btrfs_device { /* Counter to record the change of device stats */ atomic_t dev_stats_ccnt; atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; + + struct extent_io_tree alloc_state; }; /* -- cgit v1.2.3-59-g8ed1b From 60dfdf25bd31b94d0ed8e0ea50964ff22cc36a87 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:14 +0200 Subject: btrfs: Remove 'trans' argument from find_free_dev_extent(_start) Now that these functions no longer require a handle to transaction to inspect pending/pinned chunks the argument can be removed. At the same time also remove any surrounding code which acquired the handle. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 36 +++--------------------------------- fs/btrfs/volumes.c | 11 ++++------- fs/btrfs/volumes.h | 8 +++----- 3 files changed, 10 insertions(+), 45 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a9f504e7be33..8de4ddef6b1d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9915,12 +9915,10 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache) */ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) { - struct btrfs_root *root = fs_info->extent_root; struct btrfs_block_group_cache *block_group; struct btrfs_space_info *space_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - struct btrfs_trans_handle *trans; u64 min_free; u64 dev_min = 1; u64 dev_nr = 0; @@ -10019,13 +10017,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) min_free = div64_u64(min_free, dev_min); } - /* We need to do this so that we can look at pending chunks */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - mutex_lock(&fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { u64 dev_offset; @@ -10036,7 +10027,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) */ if (device->total_bytes > device->bytes_used + min_free && !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { - ret = find_free_dev_extent(trans, device, min_free, + ret = find_free_dev_extent(device, min_free, &dev_offset, NULL); if (!ret) dev_nr++; @@ -10052,7 +10043,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) "no space to allocate a new chunk for block group %llu", block_group->key.objectid); mutex_unlock(&fs_info->chunk_mutex); - btrfs_end_transaction(trans); out: btrfs_put_block_group(block_group); return ret; @@ -11304,34 +11294,14 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, while (1) { struct btrfs_fs_info *fs_info = device->fs_info; - struct btrfs_transaction *trans; u64 bytes; ret = mutex_lock_interruptible(&fs_info->chunk_mutex); if (ret) break; - ret = down_read_killable(&fs_info->commit_root_sem); - if (ret) { - mutex_unlock(&fs_info->chunk_mutex); - break; - } - - spin_lock(&fs_info->trans_lock); - trans = fs_info->running_transaction; - if (trans) - refcount_inc(&trans->use_count); - spin_unlock(&fs_info->trans_lock); - - if (!trans) - up_read(&fs_info->commit_root_sem); - - ret = find_free_dev_extent_start(trans, device, range->minlen, - start, &start, &len); - if (trans) { - up_read(&fs_info->commit_root_sem); - btrfs_put_transaction(trans); - } + ret = find_free_dev_extent_start(device, range->minlen, start, + &start, &len); if (ret) { mutex_unlock(&fs_info->chunk_mutex); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2e5e48d8dd2f..d5e8ac6012ec 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1549,8 +1549,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, * But if we don't find suitable free space, it is used to store the size of * the max free space. */ -int find_free_dev_extent_start(struct btrfs_transaction *transaction, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes, u64 search_start, u64 *start, u64 *len) { struct btrfs_fs_info *fs_info = device->fs_info; @@ -1706,13 +1705,11 @@ out: return ret; } -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { /* FIXME use last free of some kind */ - return find_free_dev_extent_start(trans->transaction, device, - num_bytes, 0, start, len); + return find_free_dev_extent_start(device, num_bytes, 0, start, len); } static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, @@ -5025,7 +5022,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (total_avail == 0) continue; - ret = find_free_dev_extent(trans, device, + ret = find_free_dev_extent(device, max_stripe_size * dev_stripes, &dev_offset, &max_avail); if (ret && ret != -ENOSPC) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6c466ac27c2e..986eaeba1a07 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -453,11 +453,9 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); -int find_free_dev_extent_start(struct btrfs_transaction *transaction, - struct btrfs_device *device, u64 num_bytes, - u64 search_start, u64 *start, u64 *max_avail); -int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, u64 num_bytes, +int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes, + u64 search_start, u64 *start, u64 *max_avail); +int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, -- cgit v1.2.3-59-g8ed1b From e74e3993bcf6a1d119a2bbe7af2cc278a147f930 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:15 +0200 Subject: btrfs: Factor out in_range macro This is used in more than one places so let's factor it out in ctree.h. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 1 - fs/btrfs/volumes.c | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 93270e20a8e7..1a6c5ce0cdac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3806,6 +3806,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) return signal_pending(current); } +#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) + /* Sanity test specific functions */ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS void btrfs_test_inode_set_ops(struct inode *inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8de4ddef6b1d..7500728bcdd3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1905,7 +1905,6 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 *discarded_bytes) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d5e8ac6012ec..7fabbbae3c62 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1505,7 +1505,6 @@ error_bdev_put: * Try to find a chunk that intersects [start, start + len] range and when one * such is found, record the end of it in *start */ -#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) static bool contains_pending_extent(struct btrfs_device *device, u64 *start, u64 len) { -- cgit v1.2.3-59-g8ed1b From 8811133d8a982d3cef5d25eef54a8dca9e8e6ded Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:16 +0200 Subject: btrfs: Optimize unallocated chunks discard Currently unallocated chunks are always trimmed. For example 2 consecutive trims on large storage would trim freespace twice irrespective of whether the space was actually allocated or not between those trims. Optimise this behavior by exploiting the newly introduced alloc_state tree of btrfs_device. A new CHUNK_TRIMMED bit is used to mark those unallocated chunks which have been trimmed and have not been allocated afterwards. On chunk allocation the respective underlying devices' physical space will have its CHUNK_TRIMMED flag cleared. This avoids submitting discards for space which hasn't been changed since the last time discard was issued. This applies to the single mount period of the filesystem as the information is not stored permanently. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/extent_io.h | 8 ++++++- fs/btrfs/extent_map.c | 4 +++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7500728bcdd3..1ebac1982a9c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11249,6 +11249,54 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, return unpin_extent_range(fs_info, start, end, false); } +static bool should_skip_trim(struct btrfs_device *device, u64 *start, u64 *len) +{ + u64 trimmed_start = 0, trimmed_end = 0; + u64 end = *start + *len - 1; + + if (!find_first_extent_bit(&device->alloc_state, *start, &trimmed_start, + &trimmed_end, CHUNK_TRIMMED, NULL)) { + u64 trimmed_len = trimmed_end - trimmed_start + 1; + + if (*start < trimmed_start) { + if (in_range(end, trimmed_start, trimmed_len) || + end > trimmed_end) { + /* + * start|------|end + * ts|--|trimmed_len + * OR + * start|-----|end + * ts|-----|trimmed_len + */ + *len = trimmed_start - *start; + return false; + } else if (end < trimmed_start) { + /* + * start|------|end + * ts|--|trimmed_len + */ + return false; + } + } else if (in_range(*start, trimmed_start, trimmed_len)) { + if (in_range(end, trimmed_start, trimmed_len)) { + /* + * start|------|end + * ts|----------|trimmed_len + */ + return true; + } else { + /* + * start|-----------|end + * ts|----------|trimmed_len + */ + *start = trimmed_end + 1; + *len = end - *start + 1; + return false; + } + } + } + return false; +} /* * It used to be that old block groups would be left around forever. * Iterating over them would be enough to trim unused space. Since we @@ -11319,7 +11367,14 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start = max(range->start, start); len = min(range->len, len); - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); + if (!should_skip_trim(device, &start, &len)) { + ret = btrfs_issue_discard(device->bdev, start, len, + &bytes); + if (!ret) + set_extent_bits(&device->alloc_state, start, + start + bytes - 1, + CHUNK_TRIMMED); + } mutex_unlock(&fs_info->chunk_mutex); if (ret) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6435c2818ec3..1680832d2c88 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -27,8 +27,14 @@ EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) -/* Redefined bits above which are used only in the device allocation tree */ +/* + * Redefined bits above which are used only in the device allocation tree, + * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV + * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit + * manipulation functions + */ #define CHUNK_ALLOCATED EXTENT_DIRTY +#define CHUNK_TRIMMED EXTENT_DEFRAG /* * flags for bio submission. The high bits indicate the compression diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 5a79a656dfa6..9558d79faf1e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -389,8 +389,10 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; setup_extent_mapping(tree, em, modified); - if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) + if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) { extent_map_device_set_bits(em, CHUNK_ALLOCATED); + extent_map_device_clear_bits(em, CHUNK_TRIMMED); + } out: return ret; } -- cgit v1.2.3-59-g8ed1b From 45bfcfc168f84f498d9825ec20ff3f4ee9208e04 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:17 +0200 Subject: btrfs: Implement find_first_clear_extent_bit This function is very similar to find_first_extent_bit except that it locates the first contiguous span of space which does not have bits set. It's intended use is in the freespace trimming code. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 2 ++ 2 files changed, 75 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ff1f7b4ac02c..828708f6510c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1542,6 +1542,79 @@ out: return ret; } +/** + * find_first_clear_extent_bit - finds the first range that has @bits not set + * and that starts after @start + * + * @tree - the tree to search + * @start - the offset at/after which the found extent should start + * @start_ret - records the beginning of the range + * @end_ret - records the end of the range (inclusive) + * @bits - the set of bits which must be unset + * + * Since unallocated range is also considered one which doesn't have the bits + * set it's possible that @end_ret contains -1, this happens in case the range + * spans (last_range_end, end of device]. In this case it's up to the caller to + * trim @end_ret to the appropriate size. + */ +void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, unsigned bits) +{ + struct extent_state *state; + struct rb_node *node, *prev = NULL, *next; + + spin_lock(&tree->lock); + + /* Find first extent with bits cleared */ + while (1) { + node = __etree_search(tree, start, &next, &prev, NULL, NULL); + if (!node) { + node = next; + if (!node) { + /* + * We are past the last allocated chunk, + * set start at the end of the last extent. The + * device alloc tree should never be empty so + * prev is always set. + */ + ASSERT(prev); + state = rb_entry(prev, struct extent_state, rb_node); + *start_ret = state->end + 1; + *end_ret = -1; + goto out; + } + } + state = rb_entry(node, struct extent_state, rb_node); + if (in_range(start, state->start, state->end - state->start + 1) && + (state->state & bits)) { + start = state->end + 1; + } else { + *start_ret = start; + break; + } + } + + /* + * Find the longest stretch from start until an entry which has the + * bits set + */ + while (1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && !(state->state & bits)) { + *end_ret = state->end; + } else { + *end_ret = state->start - 1; + break; + } + + node = rb_next(node); + if (!node) + break; + } +out: + spin_unlock(&tree->lock); +} + /* * find a contiguous range of bytes in the file marked as delalloc, not * more than 'max_bytes'. start and end are used to return the range, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1680832d2c88..f7ca1516f70b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -403,6 +403,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, unsigned bits, struct extent_state **cached_state); +void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, unsigned bits); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct page *page, struct writeback_control *wbc); -- cgit v1.2.3-59-g8ed1b From 929be17a9b49b10743c3db304f939d27da1abd2f Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 27 Mar 2019 14:24:18 +0200 Subject: btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit Instead of always calling the allocator to search for a free extent, that satisfies the input criteria, switch btrfs_trim_free_extents to using find_first_clear_extent_bit. With this change it's no longer necessary to read the device tree in order to figure out holes in the devices. Now the code always searches in-memory data structure to figure out the space range which contains the requested which should result in speed improvements. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 91 ++++++++++++++++---------------------------------- 1 file changed, 28 insertions(+), 63 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1ebac1982a9c..4fc2fca3b740 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11249,54 +11249,6 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, return unpin_extent_range(fs_info, start, end, false); } -static bool should_skip_trim(struct btrfs_device *device, u64 *start, u64 *len) -{ - u64 trimmed_start = 0, trimmed_end = 0; - u64 end = *start + *len - 1; - - if (!find_first_extent_bit(&device->alloc_state, *start, &trimmed_start, - &trimmed_end, CHUNK_TRIMMED, NULL)) { - u64 trimmed_len = trimmed_end - trimmed_start + 1; - - if (*start < trimmed_start) { - if (in_range(end, trimmed_start, trimmed_len) || - end > trimmed_end) { - /* - * start|------|end - * ts|--|trimmed_len - * OR - * start|-----|end - * ts|-----|trimmed_len - */ - *len = trimmed_start - *start; - return false; - } else if (end < trimmed_start) { - /* - * start|------|end - * ts|--|trimmed_len - */ - return false; - } - } else if (in_range(*start, trimmed_start, trimmed_len)) { - if (in_range(end, trimmed_start, trimmed_len)) { - /* - * start|------|end - * ts|----------|trimmed_len - */ - return true; - } else { - /* - * start|-----------|end - * ts|----------|trimmed_len - */ - *start = trimmed_end + 1; - *len = end - *start + 1; - return false; - } - } - } - return false; -} /* * It used to be that old block groups would be left around forever. * Iterating over them would be enough to trim unused space. Since we @@ -11320,9 +11272,10 @@ static bool should_skip_trim(struct btrfs_device *device, u64 *start, u64 *len) static int btrfs_trim_free_extents(struct btrfs_device *device, struct fstrim_range *range, u64 *trimmed) { - u64 start = range->start, len = 0; + u64 start, len = 0, end = 0; int ret; + start = max_t(u64, range->start, SZ_1M); *trimmed = 0; /* Discard not supported = nothing to do. */ @@ -11347,34 +11300,46 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, if (ret) break; - ret = find_free_dev_extent_start(device, range->minlen, start, - &start, &len); + find_first_clear_extent_bit(&device->alloc_state, start, + &start, &end, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + /* + * If find_first_clear_extent_bit find a range that spans the + * end of the device it will set end to -1, in this case it's up + * to the caller to trim the value to the size of the device. + */ + end = min(end, device->total_bytes - 1); + len = end - start + 1; - if (ret) { + /* We didn't find any extents */ + if (!len) { mutex_unlock(&fs_info->chunk_mutex); - if (ret == -ENOSPC) - ret = 0; + ret = 0; break; } + /* Keep going until we satisfy minlen or reach end of space */ + if (len < range->minlen) { + mutex_unlock(&fs_info->chunk_mutex); + start += len; + continue; + } + /* If we are out of the passed range break */ if (start > range->start + range->len - 1) { mutex_unlock(&fs_info->chunk_mutex); - ret = 0; break; } start = max(range->start, start); len = min(range->len, len); - if (!should_skip_trim(device, &start, &len)) { - ret = btrfs_issue_discard(device->bdev, start, len, - &bytes); - if (!ret) - set_extent_bits(&device->alloc_state, start, - start + bytes - 1, - CHUNK_TRIMMED); - } + ret = btrfs_issue_discard(device->bdev, start, len, + &bytes); + if (!ret) + set_extent_bits(&device->alloc_state, start, + start + bytes - 1, + CHUNK_TRIMMED); mutex_unlock(&fs_info->chunk_mutex); if (ret) -- cgit v1.2.3-59-g8ed1b From 1c4360ee056b724d3b0e4cb2d0fe4ff6c242b7ca Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:23:29 +0100 Subject: btrfs: get fs_info from eb in btrfs_check_leaf_full We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/tree-checker.c | 3 +-- fs/btrfs/tree-checker.h | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0b2b75a7efbd..3c488fc493bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -652,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6828de4e976c..594f8a0bff61 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -963,8 +963,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) return 0; } -int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf) +int btrfs_check_leaf_full(struct extent_buffer *leaf) { return check_leaf(leaf, true); } diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 4df45e8a6659..0a0c25a2e77c 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -14,8 +14,7 @@ * Will check not only the item pointers, but also every possible member * in item data. */ -int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf); +int btrfs_check_leaf_full(struct extent_buffer *leaf); /* * Less strict leaf checker. -- cgit v1.2.3-59-g8ed1b From cfdaad5e5f425b0a42ff1df3893f137b90099f44 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:24:18 +0100 Subject: btrfs: get fs_info from eb in btrfs_check_leaf_relaxed We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/tree-checker.c | 3 +-- fs/btrfs/tree-checker.h | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3c488fc493bd..516812801081 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4101,7 +4101,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) * So here we should only check item pointers, not item data. */ if (btrfs_header_level(buf) == 0 && - btrfs_check_leaf_relaxed(fs_info, buf)) { + btrfs_check_leaf_relaxed(buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 594f8a0bff61..785318ead9af 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -968,8 +968,7 @@ int btrfs_check_leaf_full(struct extent_buffer *leaf) return check_leaf(leaf, true); } -int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf) +int btrfs_check_leaf_relaxed(struct extent_buffer *leaf) { return check_leaf(leaf, false); } diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 0a0c25a2e77c..57c121ab8898 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,8 +20,7 @@ int btrfs_check_leaf_full(struct extent_buffer *leaf); * Less strict leaf checker. * Will only check item pointers, not reading item data. */ -int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf); +int btrfs_check_leaf_relaxed(struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, -- cgit v1.2.3-59-g8ed1b From 813fd1dcab2d030ce4445f3ecc143a492a62e8b8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:25:00 +0100 Subject: btrfs: get fs_info from eb in btrfs_check_node We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/tree-checker.c | 3 ++- fs/btrfs/tree-checker.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 516812801081..36c44ddadd2d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -657,7 +657,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; } - if (found_level > 0 && btrfs_check_node(fs_info, eb)) + if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; if (!ret) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 785318ead9af..3f047758ec24 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -973,8 +973,9 @@ int btrfs_check_leaf_relaxed(struct extent_buffer *leaf) return check_leaf(leaf, false); } -int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) +int btrfs_check_node(struct extent_buffer *node) { + struct btrfs_fs_info *fs_info = node->fs_info; unsigned long nr = btrfs_header_nritems(node); struct btrfs_key key, next_key; int slot; diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 57c121ab8898..96cf044df737 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -21,7 +21,7 @@ int btrfs_check_leaf_full(struct extent_buffer *leaf); * Will only check item pointers, not reading item data. */ int btrfs_check_leaf_relaxed(struct extent_buffer *leaf); -int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); +int btrfs_check_node(struct extent_buffer *node); int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, -- cgit v1.2.3-59-g8ed1b From 6ec0896c4c810da6abf2130f13cbbe38a8955697 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:38:52 +0100 Subject: btrfs: get fs_info from eb in should_balance_chunk We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7fabbbae3c62..4576f0e69d33 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3561,10 +3561,10 @@ static int chunk_soft_convert_filter(u64 chunk_type, return 0; } -static int should_balance_chunk(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk, u64 chunk_offset) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_balance_control *bctl = fs_info->balance_ctl; struct btrfs_balance_args *bargs = NULL; u64 chunk_type = btrfs_chunk_type(leaf, chunk); @@ -3744,8 +3744,7 @@ again: spin_unlock(&fs_info->balance_lock); } - ret = should_balance_chunk(fs_info, leaf, chunk, - found_key.offset); + ret = should_balance_chunk(leaf, chunk, found_key.offset); btrfs_release_path(path); if (!ret) { -- cgit v1.2.3-59-g8ed1b From ddaf1d5aef2c34f0cf5b0d71a48f9d88c5a0e2dc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:40:48 +0100 Subject: btrfs: get fs_info from eb in btrfs_check_chunk_valid We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 7 +++---- fs/btrfs/tree-checker.h | 3 +-- fs/btrfs/volumes.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 3f047758ec24..5c988b57b90e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -498,10 +498,10 @@ static void chunk_err(const struct extent_buffer *leaf, * Return -EUCLEAN if anything is corrupted. * Return 0 if everything is OK. */ -int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +int btrfs_check_chunk_valid(struct extent_buffer *leaf, struct btrfs_chunk *chunk, u64 logical) { + struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; u64 stripe_len; u16 num_stripes; @@ -793,8 +793,7 @@ static int check_leaf_item(struct extent_buffer *leaf, break; case BTRFS_CHUNK_ITEM_KEY: chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - ret = btrfs_check_chunk_valid(leaf->fs_info, leaf, chunk, - key->offset); + ret = btrfs_check_chunk_valid(leaf, chunk, key->offset); break; case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96cf044df737..32fecc9dc1dd 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -23,8 +23,7 @@ int btrfs_check_leaf_full(struct extent_buffer *leaf); int btrfs_check_leaf_relaxed(struct extent_buffer *leaf); int btrfs_check_node(struct extent_buffer *node); -int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +int btrfs_check_chunk_valid(struct extent_buffer *leaf, struct btrfs_chunk *chunk, u64 logical); #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4576f0e69d33..9a550b268017 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6710,7 +6710,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, * as chunk item in tree block is already verified by tree-checker. */ if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); + ret = btrfs_check_chunk_valid(leaf, chunk, logical); if (ret) return ret; } -- cgit v1.2.3-59-g8ed1b From 9690ac09877482a03a93b0b2147f15fc636a49f9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:43:07 +0100 Subject: btrfs: get fs_info from eb in read_one_chunk We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9a550b268017..2c04779a6d3c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6686,10 +6686,10 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) return div_u64(chunk_len, data_stripes); } -static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, - struct extent_buffer *leaf, +static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; @@ -7069,7 +7069,7 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) if (cur_offset + len > array_size) goto out_short_read; - ret = read_one_chunk(fs_info, &key, sb, chunk); + ret = read_one_chunk(&key, sb, chunk); if (ret) break; } else { @@ -7217,7 +7217,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - ret = read_one_chunk(fs_info, &found_key, leaf, chunk); + ret = read_one_chunk(&found_key, leaf, chunk); if (ret) goto error; } -- cgit v1.2.3-59-g8ed1b From 178507595c934463da440fef8a0341f9a867ba38 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:45:15 +0100 Subject: btrfs: get fs_info from eb in read_one_dev We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2c04779a6d3c..c10e389b245f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6877,10 +6877,10 @@ out: return fs_devices; } -static int read_one_dev(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, +static int read_one_dev(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) { + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; u64 devid; @@ -7210,7 +7210,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) struct btrfs_dev_item *dev_item; dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(fs_info, leaf, dev_item); + ret = read_one_dev(leaf, dev_item); if (ret) goto error; total_dev++; -- cgit v1.2.3-59-g8ed1b From 39ad317315887c2cb9a4347a93a8859326ddf136 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 26 Mar 2019 11:56:11 +0800 Subject: Btrfs: fix data bytes_may_use underflow with fallocate due to failed quota reserve When doing fallocate, we first add the range to the reserve_list and then reserve the quota. If quota reservation fails, we'll release all reserved parts of reserve_list. However, cur_offset is not updated to indicate that this range is already been inserted into the list. Therefore, the same range is freed twice. Once at list_for_each_entry loop, and once at the end of the function. This will result in WARN_ON on bytes_may_use when we free the remaining space. At the end, under the 'out' label we have a call to: btrfs_free_reserved_data_space(inode, data_reserved, alloc_start, alloc_end - cur_offset); The start offset, third argument, should be cur_offset. Everything from alloc_start to cur_offset was freed by the list_for_each_entry_safe_loop. Fixes: 18513091af94 ("btrfs: update btrfs_space_info's bytes_may_use timely") Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko Signed-off-by: David Sterba --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 15cc3b861346..c857a884a90f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3131,6 +3131,7 @@ static long btrfs_fallocate(struct file *file, int mode, ret = btrfs_qgroup_reserve_data(inode, &data_reserved, cur_offset, last_byte - cur_offset); if (ret < 0) { + cur_offset = last_byte; free_extent_map(em); break; } @@ -3180,7 +3181,7 @@ out: /* Let go of our reservation. */ if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) btrfs_free_reserved_data_space(inode, data_reserved, - alloc_start, alloc_end - cur_offset); + cur_offset, alloc_end - cur_offset); extent_changeset_free(data_reserved); return ret; } -- cgit v1.2.3-59-g8ed1b From ff2ac107fae2440b6877c615c0ac788d2a106ed7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 11:47:07 +0800 Subject: btrfs: tree-checker: Remove comprehensive root owner check Commit 1ba98d086fe3 ("Btrfs: detect corruption when non-root leaf has zero item") introduced comprehensive root owner checker. However it's pretty expensive tree search to locate the owner root, especially when it get reused by mandatory read and write time tree-checker. This patch will remove that check, and completely rely on owner based empty leaf check, which is much faster and still works fine for most case. And since we skip the old root owner check, now write time tree check can be merged with btrfs_check_leaf_full(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 5c988b57b90e..b9dd2a6e133f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -831,7 +831,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) */ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { u64 owner = btrfs_header_owner(leaf); - struct btrfs_root *check_root; /* These trees must never be empty */ if (owner == BTRFS_ROOT_TREE_OBJECTID || @@ -845,29 +844,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) owner); return -EUCLEAN; } - key.objectid = owner; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - check_root = btrfs_get_fs_root(fs_info, &key, false); - /* - * The only reason we also check NULL here is that during - * open_ctree() some roots has not yet been set up. - */ - if (!IS_ERR_OR_NULL(check_root)) { - struct extent_buffer *eb; - - eb = btrfs_root_node(check_root); - /* if leaf is the root, then it's fine */ - if (leaf != eb) { - generic_err(leaf, 0, - "invalid nritems, have %u should not be 0 for non-root leaf", - nritems); - free_extent_buffer(eb); - return -EUCLEAN; - } - free_extent_buffer(eb); - } return 0; } -- cgit v1.2.3-59-g8ed1b From 8d47a0d8f7947422dd359ac8e462687f81a7a137 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 11:47:08 +0800 Subject: btrfs: Do mandatory tree block check before submitting bio There are at least 2 reports about a memory bit flip sneaking into on-disk data. Currently we only have a relaxed check triggered at btrfs_mark_buffer_dirty() time, as it's not mandatory and only for CONFIG_BTRFS_FS_CHECK_INTEGRITY enabled build, it doesn't help users to detect such problem. This patch will address the hole by triggering comprehensive check on tree blocks before writing it back to disk. The design points are: - Timing of the check: Tree block write hook This timing is chosen to reduce the overhead. The comprehensive check should be as expensive as a checksum calculation. Doing full check at btrfs_mark_buffer_dirty() is too expensive for end user. - Loose empty leaf check Originally for an empty leaf, tree-checker will report error if it's not a tree root. The problem for such check at write time is: * False alert for tree root created in current transaction In that case, the commit root still needs to be written to disk. And since current root can differ from commit root, then it will cause false alert. This happens for log tree. * False alert for relocated tree block Relocated tree block can be written to disk due to memory pressure, in that case an empty csum tree root can be written to disk and cause false alert, since csum root node hasn't been updated. Previous patch of removing comprehensive empty leaf owner check has paved the way for this patch. The example error output will be something like: BTRFS critical (device dm-3): corrupt leaf: root=2 block=1350630375424 slot=68, bad key order, prev (10510212874240 169 0) current (1714119868416 169 0) BTRFS error (device dm-3): block=1350630375424 write time tree block corruption detected BTRFS: error (device dm-3) in btrfs_commit_transaction:2220: errno=-5 IO failure (Error while writing out transaction) BTRFS info (device dm-3): forced readonly BTRFS warning (device dm-3): Skipping commit of aborted transaction. BTRFS: error (device dm-3) in cleanup_transaction:1839: errno=-5 IO failure BTRFS info (device dm-3): delayed_refs has NO entry Reported-by: Leonard Lausen Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 36c44ddadd2d..6fc09d24a7f1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -514,6 +514,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) u8 result[BTRFS_CSUM_SIZE]; u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); struct extent_buffer *eb; + int ret; eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) @@ -535,7 +536,19 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) if (csum_tree_block(eb, result)) return -EINVAL; + if (btrfs_header_level(eb)) + ret = btrfs_check_node(eb); + else + ret = btrfs_check_leaf_full(eb); + + if (ret < 0) { + btrfs_err(fs_info, + "block=%llu write time tree block corruption detected", + eb->start); + return ret; + } write_extent_buffer(eb, result, 0, csum_size); + return 0; } -- cgit v1.2.3-59-g8ed1b From f9756261c28c0acdaac8a7a8894d3e25d334d5f3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 16:16:11 +0300 Subject: btrfs: Remove redundant inode argument from btrfs_add_ordered_sum Ordered csums are keyed off of a btrfs_ordered_extent, which already has a reference to the inode. This implies that an explicit inode argument is redundant. So remove it. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 4 ++-- fs/btrfs/ordered-data.c | 5 ++--- fs/btrfs/ordered-data.h | 3 +-- fs/btrfs/relocation.c | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 0867fca4b63d..a120d6ba3a28 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -469,7 +469,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, sums->len = this_sum_bytes; this_sum_bytes = 0; - btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_add_ordered_sum(ordered, sums); btrfs_put_ordered_extent(ordered); bytes_left = bio->bi_iter.bi_size - total_bytes; @@ -506,7 +506,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, } this_sum_bytes = 0; - btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_add_ordered_sum(ordered, sums); btrfs_put_ordered_extent(ordered); return 0; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 45e3cfd1198b..f6bb6039fa4c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -271,13 +271,12 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, * when an ordered extent is finished. If the list covers more than one * ordered extent, it is split across multiples. */ -void btrfs_add_ordered_sum(struct inode *inode, - struct btrfs_ordered_extent *entry, +void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum) { struct btrfs_ordered_inode_tree *tree; - tree = &BTRFS_I(inode)->ordered_tree; + tree = &BTRFS_I(entry->inode)->ordered_tree; spin_lock_irq(&tree->lock); list_add_tail(&sum->list, &entry->list); spin_unlock_irq(&tree->lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index fb9a161f0215..4c5991c3de14 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -167,8 +167,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type, int compress_type); -void btrfs_add_ordered_sum(struct inode *inode, - struct btrfs_ordered_extent *entry, +void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 25fc6a4f3ecf..2246aa2e30e3 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4604,7 +4604,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); sums->bytenr = new_bytenr; - btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_add_ordered_sum(ordered, sums); } out: btrfs_put_ordered_extent(ordered); -- cgit v1.2.3-59-g8ed1b From 39db232daec79c8fec83b3447a8c9b7352c79fcc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:57:46 +0100 Subject: btrfs: get fs_info from trans in write_one_cache_group We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4fc2fca3b740..ef901d168579 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3315,10 +3315,10 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } static int write_one_cache_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_root *extent_root = fs_info->extent_root; unsigned long bi; @@ -3666,8 +3666,7 @@ again: } } if (!ret) { - ret = write_one_cache_group(trans, fs_info, - path, cache); + ret = write_one_cache_group(trans, path, cache); /* * Our block group might still be attached to the list * of new block groups in the transaction handle of some @@ -3817,8 +3816,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, } } if (!ret) { - ret = write_one_cache_group(trans, fs_info, - path, cache); + ret = write_one_cache_group(trans, path, cache); /* * One of the free space endio workers might have * created a new block group while updating a free space @@ -3835,8 +3833,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret == -ENOENT) { wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); - ret = write_one_cache_group(trans, fs_info, - path, cache); + ret = write_one_cache_group(trans, path, cache); } if (ret) btrfs_abort_transaction(trans, ret); -- cgit v1.2.3-59-g8ed1b From bbebb3e0babb68bbff240608aaa14229d2d5d1dc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:02:55 +0100 Subject: btrfs: get fs_info from trans in btrfs_setup_space_cache We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/transaction.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a6c5ce0cdac..66f282340c62 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2769,8 +2769,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans); int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ef901d168579..ecb590a1e613 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3531,9 +3531,9 @@ out: return ret; } -int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *cache, *tmp; struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_path *path; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e5404326fc55..f233aeb019ec 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1104,7 +1104,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) if (ret) return ret; - ret = btrfs_setup_space_cache(trans, fs_info); + ret = btrfs_setup_space_cache(trans); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 5742d15fa76adfc833642f9c24f7c31c9b1a1646 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:04:08 +0100 Subject: btrfs: get fs_info from trans in btrfs_write_dirty_block_groups We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/transaction.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 66f282340c62..cad183a17c1c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2767,8 +2767,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_setup_space_cache(struct btrfs_trans_handle *trans); int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ecb590a1e613..86e963676f57 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3736,9 +3736,9 @@ again: return ret; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *cache; struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f233aeb019ec..cc326d964567 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1132,7 +1132,7 @@ again: } while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { - ret = btrfs_write_dirty_block_groups(trans, fs_info); + ret = btrfs_write_dirty_block_groups(trans); if (ret) return ret; ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); -- cgit v1.2.3-59-g8ed1b From 6b2794084345e55105f8b50ce33c81a9f2b6f7b9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:10:15 +0100 Subject: btrfs: get fs_info from trans in update_block_group We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 86e963676f57..3dcb53c32226 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6351,9 +6351,9 @@ void btrfs_delalloc_release_space(struct inode *inode, } static int update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, u64 bytenr, - u64 num_bytes, int alloc) + u64 bytenr, u64 num_bytes, int alloc) { + struct btrfs_fs_info *info = trans->fs_info; struct btrfs_block_group_cache *cache = NULL; u64 total = num_bytes; u64 old_val; @@ -7188,7 +7188,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, goto out; } - ret = update_block_group(trans, info, bytenr, num_bytes, 0); + ret = update_block_group(trans, bytenr, num_bytes, 0); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -8360,7 +8360,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1); + ret = update_block_group(trans, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", ins->objectid, ins->offset); @@ -8450,7 +8450,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = update_block_group(trans, fs_info, extent_key.objectid, + ret = update_block_group(trans, extent_key.objectid, fs_info->nodesize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", -- cgit v1.2.3-59-g8ed1b From 9b7a2440ae51de73bf80e07b0465b9ed736397bd Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:20:49 +0100 Subject: btrfs: get fs_info from trans in btrfs_create_tree We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/disk-io.h | 1 - fs/btrfs/free-space-tree.c | 2 +- fs/btrfs/qgroup.c | 3 +-- fs/btrfs/volumes.c | 3 +-- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6fc09d24a7f1..f5d0517a8563 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,9 +1266,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info) #endif struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 objectid) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a32158f0c2fc..a0161aa1ea0b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -134,7 +134,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans, void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 objectid); int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 4859e09d2af0..771126bc67bc 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1150,7 +1150,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); - free_space_root = btrfs_create_tree(trans, fs_info, + free_space_root = btrfs_create_tree(trans, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { ret = PTR_ERR(free_space_root); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 95361ccaa488..2f708f2c4e67 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -918,8 +918,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) /* * initially create the quota tree */ - quota_root = btrfs_create_tree(trans, fs_info, - BTRFS_QUOTA_TREE_OBJECTID); + quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); if (IS_ERR(quota_root)) { ret = PTR_ERR(quota_root); btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c10e389b245f..0a816279a803 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4620,8 +4620,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) return PTR_ERR(trans); - uuid_root = btrfs_create_tree(trans, fs_info, - BTRFS_UUID_TREE_OBJECTID); + uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID); if (IS_ERR(uuid_root)) { ret = PTR_ERR(uuid_root); btrfs_abort_transaction(trans, ret); -- cgit v1.2.3-59-g8ed1b From 4884b8e8ebf54e1654bddd9721b9b00e69d6a148 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:25:34 +0100 Subject: btrfs: get fs_info from trans in btrfs_need_log_full_commit We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 6 +++--- fs/btrfs/tree-log.h | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 79f75bec9f40..dc030f620be1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, mutex_lock(&root->log_mutex); if (root->log_root) { - if (btrfs_need_log_full_commit(fs_info, trans)) { + if (btrfs_need_log_full_commit(trans)) { ret = -EAGAIN; goto out; } @@ -3077,7 +3077,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } /* bail out if we need to do a full commit */ - if (btrfs_need_log_full_commit(fs_info, trans)) { + if (btrfs_need_log_full_commit(trans)) { ret = -EAGAIN; mutex_unlock(&root->log_mutex); goto out; @@ -3184,7 +3184,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * now that we've moved on to the tree of log tree roots, * check the full commit flag again */ - if (btrfs_need_log_full_commit(fs_info, trans)) { + if (btrfs_need_log_full_commit(trans)) { blk_finish_plug(&plug); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0fab84a8f670..bdf63d0c4128 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -36,10 +36,9 @@ static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid); } -static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans) +static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans) { - return READ_ONCE(fs_info->last_trans_log_full_commit) == + return READ_ONCE(trans->fs_info->last_trans_log_full_commit) == trans->transid; } -- cgit v1.2.3-59-g8ed1b From 907877664e2d858b9344e3a46ceeb2e74425e81f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:28:05 +0100 Subject: btrfs: get fs_info from trans in btrfs_set_log_full_commit We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/inode.c | 10 +++++----- fs/btrfs/tree-log.c | 19 +++++++++---------- fs/btrfs/tree-log.h | 5 ++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3dcb53c32226..174f4f0311da 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10640,7 +10640,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, struct btrfs_block_group_cache *cache; int ret; - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size); if (!cache) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index baa80d808806..b998e288366a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9454,7 +9454,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* Reference for the source. */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { /* force full log commit if subvolume involved. */ - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); } else { btrfs_pin_log_trans(root); root_log_pinned = true; @@ -9471,7 +9471,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* And now for the dest. */ if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { /* force full log commit if subvolume involved. */ - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); } else { btrfs_pin_log_trans(dest); dest_log_pinned = true; @@ -9607,7 +9607,7 @@ out_fail: btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); if (root_log_pinned) { btrfs_end_log_trans(root); @@ -9793,7 +9793,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BTRFS_I(old_inode)->dir_index = 0ULL; if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { /* force full log commit if subvolume involved. */ - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); } else { btrfs_pin_log_trans(root); log_pinned = true; @@ -9914,7 +9914,7 @@ out_fail: btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); btrfs_end_log_trans(root); log_pinned = false; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dc030f620be1..75a8e3c78fdb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3096,7 +3096,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret) { blk_finish_plug(&plug); btrfs_abort_transaction(trans, ret); - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); goto out; } @@ -3138,7 +3138,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, list_del_init(&root_log_ctx.list); blk_finish_plug(&plug); - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); if (ret != -ENOSPC) { btrfs_abort_transaction(trans, ret); @@ -3197,7 +3197,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW); blk_finish_plug(&plug); if (ret) { - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; @@ -3207,7 +3207,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_wait_tree_log_extents(log_root_tree, EXTENT_NEW | EXTENT_DIRTY); if (ret) { - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } @@ -3229,7 +3229,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ ret = write_all_supers(fs_info, 1); if (ret) { - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); btrfs_abort_transaction(trans, ret); goto out_wake_log_root; } @@ -3433,7 +3433,7 @@ fail: out_unlock: mutex_unlock(&dir->log_mutex); if (ret == -ENOSPC) { - btrfs_set_log_full_commit(root->fs_info, trans); + btrfs_set_log_full_commit(trans); ret = 0; } else if (ret < 0) btrfs_abort_transaction(trans, ret); @@ -3449,7 +3449,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, const char *name, int name_len, struct btrfs_inode *inode, u64 dirid) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log; u64 index; int ret; @@ -3467,7 +3466,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, dirid, &index); mutex_unlock(&inode->log_mutex); if (ret == -ENOSPC) { - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); ret = 0; } else if (ret < 0 && ret != -ENOENT) btrfs_abort_transaction(trans, ret); @@ -5453,7 +5452,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, * Make sure any commits to the log are forced to be full * commits. */ - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); ret = true; } mutex_unlock(&inode->log_mutex); @@ -6006,7 +6005,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, end_trans: dput(old_parent); if (ret < 0) { - btrfs_set_log_full_commit(fs_info, trans); + btrfs_set_log_full_commit(trans); ret = 1; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index bdf63d0c4128..132e43d29034 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -30,10 +30,9 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, INIT_LIST_HEAD(&ctx->list); } -static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans) +static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans) { - WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid); + WRITE_ONCE(trans->fs_info->last_trans_log_full_commit, trans->transid); } static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans) -- cgit v1.2.3-59-g8ed1b From 4ca75f1bd413ec0c550f925c009a7ba1b20e6003 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:42:57 +0100 Subject: btrfs: get fs_info from trans in create_free_space_inode We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/free-space-cache.c | 9 ++++----- fs/btrfs/free-space-cache.h | 3 +-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 174f4f0311da..931f7d921363 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3411,8 +3411,7 @@ again: if (block_group->ro) goto out_free; - ret = create_free_space_inode(fs_info, trans, block_group, - path); + ret = create_free_space_inode(trans, block_group, path); if (ret) goto out_free; goto again; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 207fb50dcc7a..dc1151aebf3d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -185,20 +185,19 @@ static int __create_free_space_inode(struct btrfs_root *root, return 0; } -int create_free_space_inode(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int create_free_space_inode(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { int ret; u64 ino; - ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); + ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino); if (ret < 0) return ret; - return __create_free_space_inode(fs_info->tree_root, trans, path, ino, - block_group->key.objectid); + return __create_free_space_inode(trans->fs_info->tree_root, trans, path, + ino, block_group->key.objectid); } int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 15e30b93db0d..4f0d26ccfde6 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -41,8 +41,7 @@ struct btrfs_io_ctl; struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); -int create_free_space_inode(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int create_free_space_inode(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); -- cgit v1.2.3-59-g8ed1b From fe04153452a6305ddaab056f4fa412c142a6f955 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:51:56 +0100 Subject: btrfs: get fs_info from trans in btrfs_write_out_cache We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 ++---- fs/btrfs/free-space-cache.c | 4 ++-- fs/btrfs/free-space-cache.h | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 931f7d921363..7b65d5349b91 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3644,8 +3644,7 @@ again: if (cache->disk_cache_state == BTRFS_DC_SETUP) { cache->io_ctl.inode = NULL; - ret = btrfs_write_out_cache(fs_info, trans, - cache, path); + ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { num_started++; should_put = 0; @@ -3800,8 +3799,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { cache->io_ctl.inode = NULL; - ret = btrfs_write_out_cache(fs_info, trans, - cache, path); + ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { num_started++; should_put = 0; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dc1151aebf3d..c3120df95c8a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1369,11 +1369,11 @@ out_unlock: goto out; } -int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_write_out_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct inode *inode; int ret = 0; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 4f0d26ccfde6..b3a4fce7946a 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -55,8 +55,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); -int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_write_out_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); struct inode *lookup_free_ino_inode(struct btrfs_root *root, -- cgit v1.2.3-59-g8ed1b From d30a668f1bf13893cca35bf64ae85ba8b1bc69ec Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:16:45 +0100 Subject: btrfs: get fs_info from trans in push_node_left We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0998e7fba98b..7d2223c56262 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -21,7 +21,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, int empty); static int balance_node_right(struct btrfs_trans_handle *trans, @@ -1935,7 +1934,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); - wret = push_node_left(trans, fs_info, left, mid, 1); + wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } @@ -1944,7 +1943,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, * then try to empty the right most buffer into the middle */ if (right) { - wret = push_node_left(trans, fs_info, mid, right, 1); + wret = push_node_left(trans, mid, right, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { @@ -1986,7 +1985,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } if (wret == 1) { - wret = push_node_left(trans, fs_info, left, mid, 1); + wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } @@ -2097,8 +2096,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (ret) wret = 1; else { - wret = push_node_left(trans, fs_info, - left, mid, 0); + wret = push_node_left(trans, left, mid, 0); } } if (wret < 0) @@ -3211,10 +3209,10 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, int empty) { + struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int src_nritems; int dst_nritems; -- cgit v1.2.3-59-g8ed1b From 55d32ed8d3e35e6d79d85aa6cbcb9e8dfb021c1a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:18:06 +0100 Subject: btrfs: get fs_info from trans in balance_node_right We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7d2223c56262..e76e69fdfa9e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -24,7 +24,6 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct extent_buffer *dst, struct extent_buffer *src, int empty); static int balance_node_right(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, @@ -1979,7 +1978,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_handle_fs_error(fs_info, ret, NULL); goto enospc; } - wret = balance_node_right(trans, fs_info, mid, left); + wret = balance_node_right(trans, mid, left); if (wret < 0) { ret = wret; goto enospc; @@ -2151,8 +2150,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (ret) wret = 1; else { - wret = balance_node_right(trans, fs_info, - right, mid); + wret = balance_node_right(trans, right, mid); } } if (wret < 0) @@ -3283,10 +3281,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src) { + struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int max_push; int src_nritems; -- cgit v1.2.3-59-g8ed1b From 6ad3cf6df0963845a681d39792822255598d68e2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:32:45 +0100 Subject: btrfs: get fs_info from trans in insert_ptr We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e76e69fdfa9e..f779104ab199 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3407,7 +3407,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, * blocknr is the block the key points to. */ static void insert_ptr(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, int slot, int level) { @@ -3420,7 +3420,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); BUG_ON(slot > nritems); - BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info)); + BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); if (slot != nritems) { if (level) { ret = tree_mod_log_insert_move(lower, slot + 1, slot, @@ -3520,7 +3520,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); btrfs_mark_buffer_dirty(split); - insert_ptr(trans, fs_info, path, &disk_key, split->start, + insert_ptr(trans, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { @@ -4084,8 +4084,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); - insert_ptr(trans, fs_info, path, &disk_key, right->start, - path->slots[1] + 1, 1); + insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -4291,7 +4290,7 @@ again: if (split == 0) { if (mid <= slot) { btrfs_set_header_nritems(right, 0); - insert_ptr(trans, fs_info, path, &disk_key, + insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); @@ -4300,7 +4299,7 @@ again: path->slots[1] += 1; } else { btrfs_set_header_nritems(right, 0); - insert_ptr(trans, fs_info, path, &disk_key, + insert_ptr(trans, path, &disk_key, right->start, path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); -- cgit v1.2.3-59-g8ed1b From 94f94ad9725550bc33e5561b55fc4506228b1d7b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:42:33 +0100 Subject: btrfs: get fs_info from trans in copy_for_split We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f779104ab199..f52eb952597b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4044,12 +4044,12 @@ out: * available for the resulting leaf level of the path. */ static noinline void copy_for_split(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct extent_buffer *l, struct extent_buffer *right, int slot, int mid, int nritems) { + struct btrfs_fs_info *fs_info = trans->fs_info; int data_copy_size; int rt_data_off; int i; @@ -4316,7 +4316,7 @@ again: return ret; } - copy_for_split(trans, fs_info, path, l, right, slot, mid, nritems); + copy_for_split(trans, path, l, right, slot, mid, nritems); if (split == 2) { BUG_ON(num_doubles != 0); -- cgit v1.2.3-59-g8ed1b From 6f8e0fc77cd5534f6c3b1494adcc9dd8da8998a5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:29:13 +0100 Subject: btrfs: get fs_info from trans in init_first_rw_device We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0a816279a803..46b331c4f253 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -185,8 +185,7 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf) out_overflow:; } -static int init_first_rw_device(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +static int init_first_rw_device(struct btrfs_trans_handle *trans); static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); @@ -2668,7 +2667,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (seeding_dev) { mutex_lock(&fs_info->chunk_mutex); - ret = init_first_rw_device(trans, fs_info); + ret = init_first_rw_device(trans); mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_abort_transaction(trans, ret); @@ -5304,9 +5303,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) return __btrfs_alloc_chunk(trans, chunk_offset, type); } -static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; u64 chunk_offset; u64 sys_chunk_offset; u64 alloc_profile; -- cgit v1.2.3-59-g8ed1b From 5c466629e27a346e056f5d1d0fff13f562690bbb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:36:39 +0100 Subject: btrfs: get fs_info from trans in btrfs_finish_sprout We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46b331c4f253..c728f0f604a8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2465,9 +2465,9 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) /* * Store the expected generation for seed devices in device items. */ -static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; struct btrfs_path *path; struct extent_buffer *leaf; @@ -2684,7 +2684,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (seeding_dev) { char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; - ret = btrfs_finish_sprout(trans, fs_info); + ret = btrfs_finish_sprout(trans); if (ret) { btrfs_abort_transaction(trans, ret); goto error_sysfs; -- cgit v1.2.3-59-g8ed1b From 196c9d8de8389643318c22259fd5bcfccbc7fb91 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:50:38 +0100 Subject: btrfs: get fs_info from trans in btrfs_run_dev_stats We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 2 +- fs/btrfs/volumes.c | 4 ++-- fs/btrfs/volumes.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cc326d964567..1e3d8d9b0ed5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1094,7 +1094,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) if (ret) return ret; - ret = btrfs_run_dev_stats(trans, fs_info); + ret = btrfs_run_dev_stats(trans); if (ret) return ret; ret = btrfs_run_dev_replace(trans, fs_info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c728f0f604a8..5ee24876fac3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7404,9 +7404,9 @@ out: /* * called from commit_transaction. Writes all changed device stats to disk. */ -int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_run_dev_stats(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; int stats_cnt; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 986eaeba1a07..b3a89d767512 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -462,8 +462,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats); void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); -int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev); -- cgit v1.2.3-59-g8ed1b From 2b584c688bb53d482220712e2f5810a155ec1b74 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:51:44 +0100 Subject: btrfs: get fs_info from trans in btrfs_run_dev_replace We can read fs_info from the transaction and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 4 ++-- fs/btrfs/dev-replace.h | 3 +-- fs/btrfs/transaction.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index dba43ada41d1..eadf859934fe 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -273,9 +273,9 @@ error: * called from commit_transaction. Writes changed device replace state to * disk. */ -int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_run_dev_replace(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_path *path; diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h index 4aa40bacc6cc..78c5d8f1adda 100644 --- a/fs/btrfs/dev-replace.h +++ b/fs/btrfs/dev-replace.h @@ -9,8 +9,7 @@ struct btrfs_ioctl_dev_replace_args; int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); -int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +int btrfs_run_dev_replace(struct btrfs_trans_handle *trans); int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args); void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1e3d8d9b0ed5..722ebdc02453 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1097,7 +1097,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) ret = btrfs_run_dev_stats(trans); if (ret) return ret; - ret = btrfs_run_dev_replace(trans, fs_info); + ret = btrfs_run_dev_replace(trans); if (ret) return ret; ret = btrfs_run_qgroups(trans); -- cgit v1.2.3-59-g8ed1b From 74f657d89c6734c260509338e88ad6d5f5a24e1d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Apr 2019 09:29:19 +0100 Subject: Btrfs: remove no longer used member num_dirty_bgs from transaction The member num_dirty_bgs of struct btrfs_transaction is not used anymore, it is set and incremented but nothing reads its value anymore. Its last read use was removed by commit 64403612b73a94 ("btrfs: rework btrfs_check_space_for_delayed_refs"). So just remove that member. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 1 - fs/btrfs/transaction.c | 1 - fs/btrfs/transaction.h | 1 - 3 files changed, 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b65d5349b91..7e9ba2f76ac9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6431,7 +6431,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (list_empty(&cache->dirty_list)) { list_add_tail(&cache->dirty_list, &trans->transaction->dirty_bgs); - trans->transaction->num_dirty_bgs++; trans->delayed_ref_updates++; btrfs_get_block_group(cache); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 722ebdc02453..3f6811cdf803 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -233,7 +233,6 @@ loop: INIT_LIST_HEAD(&cur_trans->io_bgs); INIT_LIST_HEAD(&cur_trans->dropped_roots); mutex_init(&cur_trans->cache_write_mutex); - cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->dropped_roots_lock); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4419a4a0294b..78c446c222b7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -80,7 +80,6 @@ struct btrfs_transaction { */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; - unsigned int num_dirty_bgs; /* Protected by spin lock fs_info->unused_bgs_lock. */ struct list_head deleted_bgs; spinlock_t dropped_roots_lock; -- cgit v1.2.3-59-g8ed1b From 34e73cc930a8677426c9cbffdd3421e18f32e79f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Apr 2019 21:15:24 +0800 Subject: btrfs: trace: Introduce trace events for sleepable tree lock There are two tree lock events which can sleep: - btrfs_tree_read_lock() - btrfs_tree_lock() Sometimes we may need to look into the concurrency picture of the fs. For that case, we need the execution time of above two functions and the owner of @eb. Here we introduce a trace events for user space tools like bcc, to get the execution time of above two functions, and get detailed owner info where eBPF code can't. All the overhead is hidden behind the trace events, so if events are not enabled, there is no overhead. These trace events also output bytenr and generation, allow them to be pared with unlock events to pin down deadlock. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/locking.c | 12 ++++++++++++ include/trace/events/btrfs.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 6df03ba36026..67b77f1d113e 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -158,6 +158,10 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb) */ void btrfs_tree_read_lock(struct extent_buffer *eb) { + u64 start_ns = 0; + + if (trace_btrfs_tree_read_lock_enabled()) + start_ns = ktime_get_ns(); again: BUG_ON(!atomic_read(&eb->blocking_writers) && current->pid == eb->lock_owner); @@ -174,6 +178,7 @@ again: BUG_ON(eb->lock_nested); eb->lock_nested = true; read_unlock(&eb->lock); + trace_btrfs_tree_read_lock(eb, start_ns); return; } if (atomic_read(&eb->blocking_writers)) { @@ -184,6 +189,7 @@ again: } btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); + trace_btrfs_tree_read_lock(eb, start_ns); } /* @@ -299,6 +305,11 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) */ void btrfs_tree_lock(struct extent_buffer *eb) { + u64 start_ns = 0; + + if (trace_btrfs_tree_lock_enabled()) + start_ns = ktime_get_ns(); + WARN_ON(eb->lock_owner == current->pid); again: wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); @@ -312,6 +323,7 @@ again: btrfs_assert_spinning_writers_get(eb); btrfs_assert_tree_write_locks_get(eb); eb->lock_owner = current->pid; + trace_btrfs_tree_lock(eb, start_ns); } /* diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8b12753fee78..e27ed5afb958 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2005,6 +2005,50 @@ TRACE_EVENT(btrfs_convert_extent_bit, __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS)) ); +DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock, + TP_PROTO(const struct extent_buffer *eb, u64 start_ns), + + TP_ARGS(eb, start_ns), + + TP_STRUCT__entry_btrfs( + __field( u64, block ) + __field( u64, generation ) + __field( u64, start_ns ) + __field( u64, end_ns ) + __field( u64, diff_ns ) + __field( u64, owner ) + __field( int, is_log_tree ) + ), + + TP_fast_assign_btrfs(eb->fs_info, + __entry->block = eb->start; + __entry->generation = btrfs_header_generation(eb); + __entry->start_ns = start_ns; + __entry->end_ns = ktime_get_ns(); + __entry->diff_ns = __entry->end_ns - start_ns; + __entry->owner = btrfs_header_owner(eb); + __entry->is_log_tree = (eb->log_index >= 0); + ), + + TP_printk_btrfs( +"block=%llu generation=%llu start_ns=%llu end_ns=%llu diff_ns=%llu owner=%llu is_log_tree=%d", + __entry->block, __entry->generation, + __entry->start_ns, __entry->end_ns, __entry->diff_ns, + __entry->owner, __entry->is_log_tree) +); + +DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_read_lock, + TP_PROTO(const struct extent_buffer *eb, u64 start_ns), + + TP_ARGS(eb, start_ns) +); + +DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_lock, + TP_PROTO(const struct extent_buffer *eb, u64 start_ns), + + TP_ARGS(eb, start_ns) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From 31aab402076f7743b70217cdaa00356e8f8ec530 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Apr 2019 21:15:25 +0800 Subject: btrfs: trace: Introduce trace events for all btrfs tree locking events Unlike btrfs_tree_lock() and btrfs_tree_read_lock(), the remaining functions in locking.c will not sleep, thus doesn't make much sense to record their execution time. Those events are introduced mainly for user space tool to audit and detect lock leakage or dead lock. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/locking.c | 10 ++++++++++ include/trace/events/btrfs.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 67b77f1d113e..2f6c3c7851ed 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -87,6 +87,7 @@ static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { } void btrfs_set_lock_blocking_read(struct extent_buffer *eb) { + trace_btrfs_set_lock_blocking_read(eb); /* * No lock is required. The lock owner may change if we have a read * lock, but it won't change to or away from us. If we have the write @@ -102,6 +103,7 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb) void btrfs_set_lock_blocking_write(struct extent_buffer *eb) { + trace_btrfs_set_lock_blocking_write(eb); /* * No lock is required. The lock owner may change if we have a read * lock, but it won't change to or away from us. If we have the write @@ -119,6 +121,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb) void btrfs_clear_lock_blocking_read(struct extent_buffer *eb) { + trace_btrfs_clear_lock_blocking_read(eb); /* * No lock is required. The lock owner may change if we have a read * lock, but it won't change to or away from us. If we have the write @@ -136,6 +139,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb) void btrfs_clear_lock_blocking_write(struct extent_buffer *eb) { + trace_btrfs_clear_lock_blocking_write(eb); /* * no lock is required. The lock owner may change if * we have a read lock, but it won't change to or away @@ -209,6 +213,7 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) } btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); + trace_btrfs_tree_read_lock_atomic(eb); return 1; } @@ -230,6 +235,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb) } btrfs_assert_tree_read_locks_get(eb); btrfs_assert_spinning_readers_get(eb); + trace_btrfs_try_tree_read_lock(eb); return 1; } @@ -252,6 +258,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) btrfs_assert_tree_write_locks_get(eb); btrfs_assert_spinning_writers_get(eb); eb->lock_owner = current->pid; + trace_btrfs_try_tree_write_lock(eb); return 1; } @@ -260,6 +267,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) */ void btrfs_tree_read_unlock(struct extent_buffer *eb) { + trace_btrfs_tree_read_unlock(eb); /* * if we're nested, we have the write lock. No new locking * is needed as long as we are the lock owner. @@ -281,6 +289,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) */ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) { + trace_btrfs_tree_read_unlock_blocking(eb); /* * if we're nested, we have the write lock. No new locking * is needed as long as we are the lock owner. @@ -336,6 +345,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb) BUG_ON(blockers > 1); btrfs_assert_tree_locked(eb); + trace_btrfs_tree_unlock(eb); eb->lock_owner = 0; btrfs_assert_tree_write_locks_put(eb); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e27ed5afb958..fe4d268028ee 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2049,6 +2049,48 @@ DEFINE_EVENT(btrfs_sleep_tree_lock, btrfs_tree_lock, TP_ARGS(eb, start_ns) ); +DECLARE_EVENT_CLASS(btrfs_locking_events, + TP_PROTO(const struct extent_buffer *eb), + + TP_ARGS(eb), + + TP_STRUCT__entry_btrfs( + __field( u64, block ) + __field( u64, generation ) + __field( u64, owner ) + __field( int, is_log_tree ) + ), + + TP_fast_assign_btrfs(eb->fs_info, + __entry->block = eb->start; + __entry->generation = btrfs_header_generation(eb); + __entry->owner = btrfs_header_owner(eb); + __entry->is_log_tree = (eb->log_index >= 0); + ), + + TP_printk_btrfs("block=%llu generation=%llu owner=%llu is_log_tree=%d", + __entry->block, __entry->generation, + __entry->owner, __entry->is_log_tree) +); + +#define DEFINE_BTRFS_LOCK_EVENT(name) \ +DEFINE_EVENT(btrfs_locking_events, name, \ + TP_PROTO(const struct extent_buffer *eb), \ + \ + TP_ARGS(eb) \ +) + +DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock); +DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock); +DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking); +DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read); +DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write); +DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_read); +DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_write); +DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock); +DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock); +DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From cac237ae095f6d92bda05ccd6df349d20f1a4bfc Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:54 +0800 Subject: btrfs: rename btrfs_setxattr to btrfs_setxattr_trans Rename btrfs_setxattr() to btrfs_setxattr_trans(), so that do_setxattr() can be renamed to btrfs_setxattr(). Preparatory patch, no functional changes. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/acl.c | 2 +- fs/btrfs/props.c | 20 ++++++++++---------- fs/btrfs/xattr.c | 12 ++++++------ fs/btrfs/xattr.h | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 5810463dc6d2..d3b04c6abc61 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -93,7 +93,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_setxattr(trans, inode, name, value, size, 0); + ret = btrfs_setxattr_trans(trans, inode, name, value, size, 0); out: kfree(value); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 0d1c3485c098..61ced0ebb5ba 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -70,8 +70,8 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return -EINVAL; if (value_len == 0) { - ret = btrfs_setxattr(trans, inode, handler->xattr_name, - NULL, 0, flags); + ret = btrfs_setxattr_trans(trans, inode, handler->xattr_name, + NULL, 0, flags); if (ret) return ret; @@ -84,14 +84,14 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, ret = handler->validate(value, value_len); if (ret) return ret; - ret = btrfs_setxattr(trans, inode, handler->xattr_name, - value, value_len, flags); + ret = btrfs_setxattr_trans(trans, inode, handler->xattr_name, + value, value_len, flags); if (ret) return ret; ret = handler->apply(inode, value, value_len); if (ret) { - btrfs_setxattr(trans, inode, handler->xattr_name, - NULL, 0, flags); + btrfs_setxattr_trans(trans, inode, handler->xattr_name, + NULL, 0, flags); return ret; } @@ -358,13 +358,13 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = btrfs_setxattr(trans, inode, h->xattr_name, value, - strlen(value), 0); + ret = btrfs_setxattr_trans(trans, inode, h->xattr_name, value, + strlen(value), 0); if (!ret) { ret = h->apply(inode, value, strlen(value)); if (ret) - btrfs_setxattr(trans, inode, h->xattr_name, - NULL, 0, 0); + btrfs_setxattr_trans(trans, inode, h->xattr_name, + NULL, 0, 0); else set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index fa820c56ba3e..38eb78aac0a7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -220,9 +220,9 @@ out: /* * @value: "" makes the attribute to empty, NULL removes it */ -int btrfs_setxattr(struct btrfs_trans_handle *trans, - struct inode *inode, const char *name, - const void *value, size_t size, int flags) +int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -370,7 +370,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, size_t size, int flags) { name = xattr_full_name(handler, name); - return btrfs_setxattr(NULL, inode, name, buffer, size, flags); + return btrfs_setxattr_trans(NULL, inode, name, buffer, size, flags); } static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, @@ -441,8 +441,8 @@ static int btrfs_initxattrs(struct inode *inode, } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - err = btrfs_setxattr(trans, inode, name, xattr->value, - xattr->value_len, 0); + err = btrfs_setxattr_trans(trans, inode, name, xattr->value, + xattr->value_len, 0); kfree(name); if (err < 0) break; diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 471fcac6ff55..36d54a15cbfe 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -12,7 +12,7 @@ extern const struct xattr_handler *btrfs_xattr_handlers[]; int btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size); -int btrfs_setxattr(struct btrfs_trans_handle *trans, +int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -- cgit v1.2.3-59-g8ed1b From 2d74fa3efcffe456d16833af118ffe0cb0ee6785 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:55 +0800 Subject: btrfs: rename do_setxattr to btrfs_setxattr When trans is not NULL btrfs_setxattr() calls do_setxattr() directly with a check for readonly root. Rename do_setxattr() btrfs_setxattr() in preparation to call do_setxattr() directly instead. Preparatory patch, no functional changes. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 38eb78aac0a7..fd1469ef55d6 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -76,9 +76,9 @@ out: return ret; } -static int do_setxattr(struct btrfs_trans_handle *trans, - struct inode *inode, const char *name, - const void *value, size_t size, int flags) +static int btrfs_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) { struct btrfs_dir_item *di = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -231,13 +231,13 @@ int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, return -EROFS; if (trans) - return do_setxattr(trans, inode, name, value, size, flags); + return btrfs_setxattr(trans, inode, name, value, size, flags); trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = do_setxattr(trans, inode, name, value, size, flags); + ret = btrfs_setxattr(trans, inode, name, value, size, flags); if (ret) goto out; -- cgit v1.2.3-59-g8ed1b From 3e125a74fbc1938afd699ba9ba7f33801768bd0e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:56 +0800 Subject: btrfs: export btrfs_setxattr Preparatory patch, as we are going split the calls with and without transaction to use the respective btrfs_setxattr() and btrfs_setxattr_trans() functions. Export btrfs_setxattr() for calls outside of xattr.c. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 5 ++--- fs/btrfs/xattr.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index fd1469ef55d6..b2b68676ec52 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -76,9 +76,8 @@ out: return ret; } -static int btrfs_setxattr(struct btrfs_trans_handle *trans, - struct inode *inode, const char *name, - const void *value, size_t size, int flags) +int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { struct btrfs_dir_item *di = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 36d54a15cbfe..a95834cc3c04 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -12,6 +12,8 @@ extern const struct xattr_handler *btrfs_xattr_handlers[]; int btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size); +int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const void *value, size_t size, int flags); int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags); -- cgit v1.2.3-59-g8ed1b From 353c2ea735e4b54fb6250861e67b51b1bcb47198 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:57 +0800 Subject: btrfs: remove redundant readonly root check in btrfs_setxattr_trans btrfs_setxattr_trans() is called by 5 functions as below and all of them do updates. None of them would be roun on a read-only root. So its ok to remove the readonly root check here as it's a high-level conditon. 1. __btrfs_set_acl() btrfs_init_acl() btrfs_init_inode_security() 2. __btrfs_set_acl() btrfs_set_acl() 3. btrfs_set_prop() btrfs_set_prop_trans() / \ btrfs_ioctl_setflags() btrfs_xattr_handler_set_prop() 4. btrfs_xattr_handler_set() 5. btrfs_initxattrs() btrfs_xattr_security_init() btrfs_init_inode_security() Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b2b68676ec52..4c447b1f32e5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -226,9 +226,6 @@ int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - if (btrfs_root_readonly(root)) - return -EROFS; - if (trans) return btrfs_setxattr(trans, inode, name, value, size, flags); -- cgit v1.2.3-59-g8ed1b From 04e6863b19c72279bcbeffa26d85d649ab9c8205 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:58 +0800 Subject: btrfs: split btrfs_setxattr calls regarding transaction When the caller has already created the transaction handle, btrfs_setxattr() will use it. Also adds assert in btrfs_setxattr(). Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/acl.c | 6 +++++- fs/btrfs/props.c | 34 ++++++++++++++++++++++++---------- fs/btrfs/xattr.c | 6 ++++-- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index d3b04c6abc61..7fe6551bc59b 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_setxattr_trans(trans, inode, name, value, size, 0); + if (trans) + ret = btrfs_setxattr(trans, inode, name, value, size, 0); + else + ret = btrfs_setxattr_trans(NULL, inode, name, value, size, 0); + out: kfree(value); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 61ced0ebb5ba..a73c1bdc7b05 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -70,8 +70,13 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return -EINVAL; if (value_len == 0) { - ret = btrfs_setxattr_trans(trans, inode, handler->xattr_name, - NULL, 0, flags); + if (trans) + ret = btrfs_setxattr(trans, inode, handler->xattr_name, + NULL, 0, flags); + else + ret = btrfs_setxattr_trans(NULL, inode, + handler->xattr_name, NULL, 0, + flags); if (ret) return ret; @@ -84,14 +89,23 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, ret = handler->validate(value, value_len); if (ret) return ret; - ret = btrfs_setxattr_trans(trans, inode, handler->xattr_name, - value, value_len, flags); + if (trans) + ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, + value_len, flags); + else + ret = btrfs_setxattr_trans(NULL, inode, handler->xattr_name, + value, value_len, flags); + if (ret) return ret; ret = handler->apply(inode, value, value_len); if (ret) { - btrfs_setxattr_trans(trans, inode, handler->xattr_name, - NULL, 0, flags); + if (trans) + btrfs_setxattr(trans, inode, handler->xattr_name, NULL, + 0, flags); + else + btrfs_setxattr_trans(NULL, inode, handler->xattr_name, + NULL, 0, flags); return ret; } @@ -358,13 +372,13 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = btrfs_setxattr_trans(trans, inode, h->xattr_name, value, - strlen(value), 0); + ret = btrfs_setxattr(trans, inode, h->xattr_name, value, + strlen(value), 0); if (!ret) { ret = h->apply(inode, value, strlen(value)); if (ret) - btrfs_setxattr_trans(trans, inode, h->xattr_name, - NULL, 0, 0); + btrfs_setxattr(trans, inode, h->xattr_name, + NULL, 0, 0); else set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 4c447b1f32e5..623d508f21a6 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -86,6 +86,8 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, size_t name_len = strlen(name); int ret = 0; + ASSERT(trans); + if (name_len + size > BTRFS_MAX_XATTR_SIZE(root->fs_info)) return -ENOSPC; @@ -437,8 +439,8 @@ static int btrfs_initxattrs(struct inode *inode, } strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); - err = btrfs_setxattr_trans(trans, inode, name, xattr->value, - xattr->value_len, 0); + err = btrfs_setxattr(trans, inode, name, xattr->value, + xattr->value_len, 0); kfree(name); if (err < 0) break; -- cgit v1.2.3-59-g8ed1b From e3de9b159a927fc1fd65e4d090b981b781c58926 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 12 Apr 2019 16:02:59 +0800 Subject: btrfs: cleanup btrfs_setxattr_trans and drop transaction parameter Previous patch made sure that btrfs_setxattr_trans() is called only when transaction NULL. Clean up btrfs_setxattr_trans() and drop the parameter. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/acl.c | 2 +- fs/btrfs/props.c | 13 ++++++------- fs/btrfs/xattr.c | 9 +++------ fs/btrfs/xattr.h | 5 ++--- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 7fe6551bc59b..a0af1b952c4d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -96,7 +96,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, if (trans) ret = btrfs_setxattr(trans, inode, name, value, size, 0); else - ret = btrfs_setxattr_trans(NULL, inode, name, value, size, 0); + ret = btrfs_setxattr_trans(inode, name, value, size, 0); out: kfree(value); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index a73c1bdc7b05..44b7bf647ab3 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -74,9 +74,8 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, ret = btrfs_setxattr(trans, inode, handler->xattr_name, NULL, 0, flags); else - ret = btrfs_setxattr_trans(NULL, inode, - handler->xattr_name, NULL, 0, - flags); + ret = btrfs_setxattr_trans(inode, handler->xattr_name, + NULL, 0, flags); if (ret) return ret; @@ -93,8 +92,8 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, value_len, flags); else - ret = btrfs_setxattr_trans(NULL, inode, handler->xattr_name, - value, value_len, flags); + ret = btrfs_setxattr_trans(inode, handler->xattr_name, value, + value_len, flags); if (ret) return ret; @@ -104,8 +103,8 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, btrfs_setxattr(trans, inode, handler->xattr_name, NULL, 0, flags); else - btrfs_setxattr_trans(NULL, inode, handler->xattr_name, - NULL, 0, flags); + btrfs_setxattr_trans(inode, handler->xattr_name, NULL, + 0, flags); return ret; } diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 623d508f21a6..10da873d11f5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -221,16 +221,13 @@ out: /* * @value: "" makes the attribute to empty, NULL removes it */ -int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, - struct inode *inode, const char *name, +int btrfs_setxattr_trans(struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; int ret; - if (trans) - return btrfs_setxattr(trans, inode, name, value, size, flags); - trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -368,7 +365,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, size_t size, int flags) { name = xattr_full_name(handler, name); - return btrfs_setxattr_trans(NULL, inode, name, buffer, size, flags); + return btrfs_setxattr_trans(inode, name, buffer, size, flags); } static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index a95834cc3c04..1cd3fc0a8f17 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -14,9 +14,8 @@ int btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size); int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags); -int btrfs_setxattr_trans(struct btrfs_trans_handle *trans, - struct inode *inode, const char *name, - const void *value, size_t size, int flags); +int btrfs_setxattr_trans(struct inode *inode, const char *name, + const void *value, size_t size, int flags); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, -- cgit v1.2.3-59-g8ed1b From 32b593bfcb58638f40a72fd5c6db50a21616a54e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Apr 2019 10:28:47 +0100 Subject: Btrfs: remove no longer used function to run delayed refs asynchronously It used to be called from only two places (truncate path and releasing a transaction handle), but commits 28bad2125767c5 ("btrfs: fix truncate throttling") and db2462a6ad3dc4 ("btrfs: don't run delayed refs in the end transaction logic") removed their calls to this function, so it's not used anymore. Just remove it and all its helpers. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 -- fs/btrfs/extent-tree.c | 85 -------------------------------------------------- 2 files changed, 87 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cad183a17c1c..69a60d2217e5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2701,8 +2701,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); -int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, - unsigned long count, u64 transid, int wait); void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e9ba2f76ac9..2f32d996c442 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2876,91 +2876,6 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) return btrfs_check_space_for_delayed_refs(trans->fs_info); } -struct async_delayed_refs { - struct btrfs_root *root; - u64 transid; - int count; - int error; - int sync; - struct completion wait; - struct btrfs_work work; -}; - -static inline struct async_delayed_refs * -to_async_delayed_refs(struct btrfs_work *work) -{ - return container_of(work, struct async_delayed_refs, work); -} - -static void delayed_ref_async_start(struct btrfs_work *work) -{ - struct async_delayed_refs *async = to_async_delayed_refs(work); - struct btrfs_trans_handle *trans; - struct btrfs_fs_info *fs_info = async->root->fs_info; - int ret; - - /* if the commit is already started, we don't need to wait here */ - if (btrfs_transaction_blocked(fs_info)) - goto done; - - trans = btrfs_join_transaction(async->root); - if (IS_ERR(trans)) { - async->error = PTR_ERR(trans); - goto done; - } - - /* Don't bother flushing if we got into a different transaction */ - if (trans->transid > async->transid) - goto end; - - ret = btrfs_run_delayed_refs(trans, async->count); - if (ret) - async->error = ret; -end: - ret = btrfs_end_transaction(trans); - if (ret && !async->error) - async->error = ret; -done: - if (async->sync) - complete(&async->wait); - else - kfree(async); -} - -int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, - unsigned long count, u64 transid, int wait) -{ - struct async_delayed_refs *async; - int ret; - - async = kmalloc(sizeof(*async), GFP_NOFS); - if (!async) - return -ENOMEM; - - async->root = fs_info->tree_root; - async->count = count; - async->error = 0; - async->transid = transid; - if (wait) - async->sync = 1; - else - async->sync = 0; - init_completion(&async->wait); - - btrfs_init_work(&async->work, btrfs_extent_refs_helper, - delayed_ref_async_start, NULL, NULL); - - btrfs_queue_work(fs_info->extent_workers, &async->work); - - if (wait) { - wait_for_completion(&async->wait); - ret = async->error; - kfree(async); - return ret; - } - return 0; -} - /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be -- cgit v1.2.3-59-g8ed1b From f87b7eb821e30d2db8ac62957293982cf91e37c2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:01:07 +0100 Subject: btrfs: get fs_info from block group in next_block_group We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2f32d996c442..7a0f3e151999 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3256,10 +3256,10 @@ fail: } -static struct btrfs_block_group_cache * -next_block_group(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache) +static struct btrfs_block_group_cache *next_block_group( + struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; struct rb_node *node; spin_lock(&fs_info->block_group_cache_lock); @@ -10050,7 +10050,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) if (block_group->iref) break; spin_unlock(&block_group->lock); - block_group = next_block_group(info, block_group); + block_group = next_block_group(block_group); } if (!block_group) { if (last == 0) @@ -11296,7 +11296,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) int ret = 0; cache = btrfs_lookup_first_block_group(fs_info, range->start); - for (; cache; cache = next_block_group(fs_info, cache)) { + for (; cache; cache = next_block_group(cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); break; -- cgit v1.2.3-59-g8ed1b From fdf08605b95d36c04e90915cc0eb729df324c4bc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 12:12:32 +0100 Subject: btrfs: get fs_info from block group in pin_down_extent We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a0f3e151999..a8b0434641e5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6392,10 +6392,11 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) return bytenr; } -static int pin_down_extent(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache, +static int pin_down_extent(struct btrfs_block_group_cache *cache, u64 bytenr, u64 num_bytes, int reserved) { + struct btrfs_fs_info *fs_info = cache->fs_info; + spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->pinned += num_bytes; @@ -6427,7 +6428,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info, cache = btrfs_lookup_block_group(fs_info, bytenr); BUG_ON(!cache); /* Logic error */ - pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved); + pin_down_extent(cache, bytenr, num_bytes, reserved); btrfs_put_block_group(cache); return 0; @@ -6454,7 +6455,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, */ cache_block_group(cache, 1); - pin_down_extent(fs_info, cache, bytenr, num_bytes, 0); + pin_down_extent(cache, bytenr, num_bytes, 0); /* remove us from the free space cache (if we're there at all) */ ret = btrfs_remove_free_space(cache, bytenr, num_bytes); @@ -7206,8 +7207,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(fs_info, buf->start); if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { - pin_down_extent(fs_info, cache, buf->start, - buf->len, 1); + pin_down_extent(cache, buf->start, buf->len, 1); btrfs_put_block_group(cache); goto out; } @@ -8182,7 +8182,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, } if (pin) - pin_down_extent(fs_info, cache, start, len, 1); + pin_down_extent(cache, start, len, 1); else { if (btrfs_test_opt(fs_info, DISCARD)) ret = btrfs_discard_extent(fs_info, start, len, NULL); -- cgit v1.2.3-59-g8ed1b From 7949f3392ed65d19f0f6726e9e32445aa8b707dc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:40:19 +0100 Subject: btrfs: get fs_info from block group in lookup_free_space_inode We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/free-space-cache.c | 11 ++++++----- fs/btrfs/free-space-cache.h | 6 +++--- fs/btrfs/relocation.c | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a8b0434641e5..e881c20b975a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3312,7 +3312,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, if (trans->aborted) return 0; again: - inode = lookup_free_space_inode(fs_info, block_group, path); + inode = lookup_free_space_inode(block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); btrfs_release_path(path); @@ -10699,7 +10699,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * get the inode first so any iput calls done for the io_list * aren't the final iput (no unlinks allowed now) */ - inode = lookup_free_space_inode(fs_info, block_group, path); + inode = lookup_free_space_inode(block_group, path); mutex_lock(&trans->transaction->cache_write_mutex); /* diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c3120df95c8a..f30c1bc1dced 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -88,10 +88,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, return inode; } -struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache - *block_group, struct btrfs_path *path) +struct inode *lookup_free_space_inode( + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct inode *inode = NULL; u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; @@ -857,7 +858,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, * once created get their ->cached field set to BTRFS_CACHE_FINISHED so * we will never try to read their inode item while the fs is mounted. */ - inode = lookup_free_space_inode(fs_info, block_group, path); + inode = lookup_free_space_inode(block_group, path); if (IS_ERR(inode)) { btrfs_free_path(path); return 0; @@ -1385,7 +1386,7 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans, } spin_unlock(&block_group->lock); - inode = lookup_free_space_inode(fs_info, block_group, path); + inode = lookup_free_space_inode(block_group, path); if (IS_ERR(inode)) return 0; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index b3a4fce7946a..ab387c219bb0 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -38,9 +38,9 @@ struct btrfs_free_space_op { struct btrfs_io_ctl; -struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache - *block_group, struct btrfs_path *path); +struct inode *lookup_free_space_inode( + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path); int create_free_space_inode(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2246aa2e30e3..9babbc74d145 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4299,7 +4299,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) goto out; } - inode = lookup_free_space_inode(fs_info, rc->block_group, path); + inode = lookup_free_space_inode(rc->block_group, path); btrfs_free_path(path); if (!IS_ERR(inode)) -- cgit v1.2.3-59-g8ed1b From bb6cb1c5b92bc67082acf4dd0f037d8d3d0d0c61 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:47:15 +0100 Subject: btrfs: get fs_info from block group in load_free_space_cache We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/free-space-cache.c | 4 ++-- fs/btrfs/free-space-cache.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e881c20b975a..370698477627 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -643,7 +643,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, if (btrfs_test_opt(fs_info, SPACE_CACHE)) { mutex_lock(&caching_ctl->mutex); - ret = load_free_space_cache(fs_info, cache); + ret = load_free_space_cache(cache); spin_lock(&cache->lock); if (ret == 1) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f30c1bc1dced..98ebe4b28f29 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -812,9 +812,9 @@ free_cache: goto out; } -int load_free_space_cache(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group) +int load_free_space_cache(struct btrfs_block_group_cache *block_group) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct inode *inode; struct btrfs_path *path; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index ab387c219bb0..9067841a8528 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -50,8 +50,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct inode *inode); -int load_free_space_cache(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group); +int load_free_space_cache(struct btrfs_block_group_cache *block_group); int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); -- cgit v1.2.3-59-g8ed1b From 6701bdb39ceb48d9b155c2b7f989c250140d6e45 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:49:09 +0100 Subject: btrfs: get fs_info from block group in write_pinned_extent_entries We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 98ebe4b28f29..33c3e6f1ae37 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1039,8 +1039,7 @@ fail: return -1; } -static noinline_for_stack int -write_pinned_extent_entries(struct btrfs_fs_info *fs_info, +static noinline_for_stack int write_pinned_extent_entries( struct btrfs_block_group_cache *block_group, struct btrfs_io_ctl *io_ctl, int *entries) @@ -1059,7 +1058,7 @@ write_pinned_extent_entries(struct btrfs_fs_info *fs_info, * We shouldn't have switched the pinned extents yet so this is the * right one */ - unpin = fs_info->pinned_extents; + unpin = block_group->fs_info->pinned_extents; start = block_group->key.objectid; @@ -1235,7 +1234,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_io_ctl *io_ctl, struct btrfs_trans_handle *trans) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_state *cached_state = NULL; LIST_HEAD(bitmap_list); int entries = 0; @@ -1293,8 +1291,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * If this changes while we are working we'll get added back to * the dirty list and redo it. No locking needed */ - ret = write_pinned_extent_entries(fs_info, block_group, - io_ctl, &entries); + ret = write_pinned_extent_entries(block_group, io_ctl, &entries); if (ret) goto out_nospc_locked; -- cgit v1.2.3-59-g8ed1b From 2ceeae2e4c4c3be9d52e84909e3882302002bb9c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 13:53:49 +0100 Subject: btrfs: get fs_info from block group in btrfs_find_space_cluster We can read fs_info from the block group cache structure and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 ++---- fs/btrfs/free-space-cache.c | 4 ++-- fs/btrfs/free-space-cache.h | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 370698477627..21c9d4a482e0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7470,7 +7470,6 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg, struct find_free_extent_ctl *ffe_ctl, struct btrfs_block_group_cache **cluster_bg_ret) { - struct btrfs_fs_info *fs_info = bg->fs_info; struct btrfs_block_group_cache *cluster_bg; u64 aligned_cluster; u64 offset; @@ -7530,9 +7529,8 @@ refill_cluster: aligned_cluster = max_t(u64, ffe_ctl->empty_cluster + ffe_ctl->empty_size, bg->full_stripe_len); - ret = btrfs_find_space_cluster(fs_info, bg, last_ptr, - ffe_ctl->search_start, ffe_ctl->num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start, + ffe_ctl->num_bytes, aligned_cluster); if (ret == 0) { /* Now pull our allocation out of this cluster */ offset = btrfs_alloc_from_cluster(bg, last_ptr, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 33c3e6f1ae37..f74dc259307b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3037,11 +3037,11 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, * returns zero and sets up cluster if things worked out, otherwise * it returns -enospc */ -int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, +int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry, *tmp; LIST_HEAD(bitmaps); diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 9067841a8528..8760acb55ffd 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -92,8 +92,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, u64 bytes); -int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, +int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size); void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); -- cgit v1.2.3-59-g8ed1b From 2ccf545e0db9398d0459915a94cb995c9fd5c570 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:11:21 +0100 Subject: btrfs: get fs_info from block group in search_free_space_info We can read fs_info from the block group cache structure and can drop it from the parameters. Though the transaction is also availabe, it's not guaranteed to be non-NULL. Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 20 ++++++++------------ fs/btrfs/free-space-tree.h | 1 - fs/btrfs/tests/free-space-tree-tests.c | 4 ++-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 771126bc67bc..f5dc115ebba0 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -76,10 +76,11 @@ out: EXPORT_FOR_TESTS struct btrfs_free_space_info *search_free_space_info( - struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path, int cow) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *root = fs_info->free_space_root; struct btrfs_key key; int ret; @@ -253,7 +254,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, btrfs_release_path(path); } - info = search_free_space_info(trans, fs_info, block_group, path, 1); + info = search_free_space_info(trans, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; @@ -398,7 +399,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, btrfs_release_path(path); } - info = search_free_space_info(trans, fs_info, block_group, path, 1); + info = search_free_space_info(trans, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; @@ -463,8 +464,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans, if (new_extents == 0) return 0; - info = search_free_space_info(trans, trans->fs_info, block_group, path, - 1); + info = search_free_space_info(trans, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; @@ -793,8 +793,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, return ret; } - info = search_free_space_info(NULL, trans->fs_info, block_group, path, - 0); + info = search_free_space_info(NULL, block_group, path, 0); if (IS_ERR(info)) return PTR_ERR(info); flags = btrfs_free_space_flags(path->nodes[0], info); @@ -977,7 +976,6 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path, u64 start, u64 size) { - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_free_space_info *info; u32 flags; int ret; @@ -988,7 +986,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans, return ret; } - info = search_free_space_info(NULL, fs_info, block_group, path, 0); + info = search_free_space_info(NULL, block_group, path, 0); if (IS_ERR(info)) return PTR_ERR(info); flags = btrfs_free_space_flags(path->nodes[0], info); @@ -1534,14 +1532,12 @@ out: int load_free_space_tree(struct btrfs_caching_control *caching_ctl) { struct btrfs_block_group_cache *block_group; - struct btrfs_fs_info *fs_info; struct btrfs_free_space_info *info; struct btrfs_path *path; u32 extent_count, flags; int ret; block_group = caching_ctl->block_group; - fs_info = block_group->fs_info; path = btrfs_alloc_path(); if (!path) @@ -1555,7 +1551,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl) path->search_commit_root = 1; path->reada = READA_FORWARD; - info = search_free_space_info(NULL, fs_info, block_group, path, 0); + info = search_free_space_info(NULL, block_group, path, 0); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index 3133651d7d70..22b7602bde25 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -30,7 +30,6 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_free_space_info * search_free_space_info(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path, int cow); int __add_to_free_space_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 09c27628e305..9b26ada1873b 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -30,7 +30,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans, unsigned int i; int ret; - info = search_free_space_info(trans, fs_info, cache, path, 0); + info = search_free_space_info(trans, cache, path, 0); if (IS_ERR(info)) { test_err("could not find free space info"); ret = PTR_ERR(info); @@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans, u32 flags; int ret; - info = search_free_space_info(trans, fs_info, cache, path, 0); + info = search_free_space_info(trans, cache, path, 0); if (IS_ERR(info)) { test_err("could not find free space info"); btrfs_release_path(path); -- cgit v1.2.3-59-g8ed1b From a9355a0ef32f109b08bb0ff51d8ad5bd173cf21b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 17:24:38 +0300 Subject: btrfs: Define submit_bio_hook's type directly There is no need to use a typedef to define the type of the function and then use that to define the respective member in extent_io_ops. Define struct's member directly. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f7ca1516f70b..79bd20cf4226 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -95,9 +95,6 @@ struct btrfs_inode; struct btrfs_io_bio; struct io_failure_record; -typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset); typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, struct bio *bio, u64 bio_offset); @@ -107,7 +104,9 @@ struct extent_io_ops { * The following callbacks must be always defined, the function * pointer will be called unconditionally. */ - extent_submit_bio_hook_t *submit_bio_hook; + blk_status_t (*submit_bio_hook)(void *private_data, struct bio *bio, + int mirror_num, unsigned long bio_flags, + u64 bio_offset); int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror); -- cgit v1.2.3-59-g8ed1b From a56b1c7bc83c2c5439e4a5d44f35cea36fbe2c9d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 17:24:39 +0300 Subject: btrfs: Change submit_bio_hook to taking an inode directly The only possible 'private_data' that is passed to this function is actually an inode. Make that explicit by changing the signature of the call back. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5d0517a8563..d41511a05f2e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -885,11 +885,10 @@ static int check_async_write(struct btrfs_inode *bi) return 1; } -static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, +static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { - struct inode *inode = private_data; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int async = check_async_write(BTRFS_I(inode)); blk_status_t ret; @@ -915,7 +914,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, * checksumming can happen in parallel across all CPUs */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, - bio_offset, private_data, + bio_offset, inode, btree_submit_bio_start); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 79bd20cf4226..abafb48947ef 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -104,7 +104,7 @@ struct extent_io_ops { * The following callbacks must be always defined, the function * pointer will be called unconditionally. */ - blk_status_t (*submit_bio_hook)(void *private_data, struct bio *bio, + blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset); int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b998e288366a..130b1c8f546a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1953,11 +1953,10 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio, * * c-3) otherwise: async submit */ -static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, +static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { - struct inode *inode = private_data; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; -- cgit v1.2.3-59-g8ed1b From c2ccfbc62e9f00979fc1a82ab93ff0a4ddd8944a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 17:24:40 +0300 Subject: btrfs: Remove 'tree' argument from read_extent_buffer_pages This function always uses the btree inode's io_tree. Stop taking the tree as a function argument and instead access it internally from read_extent_buffer_pages. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 12 +++--------- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/extent_io.h | 3 +-- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d41511a05f2e..62983ff0c3e1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -465,8 +465,7 @@ static int btree_read_extent_buffer_pages(struct extent_buffer *eb, io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); - ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, - mirror_num); + ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num); if (!ret) { if (verify_parent_transid(io_tree, eb, parent_transid, 0)) @@ -1034,15 +1033,13 @@ static const struct address_space_operations btree_aops = { void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = fs_info->btree_inode; int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return; - ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, - WAIT_NONE, 0); + ret = read_extent_buffer_pages(buf, WAIT_NONE, 0); if (ret < 0) free_extent_buffer_stale(buf); else @@ -1053,8 +1050,6 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, int mirror_num, struct extent_buffer **eb) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = fs_info->btree_inode; - struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); @@ -1063,8 +1058,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); - ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, - mirror_num); + ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num); if (ret) { free_extent_buffer_stale(buf); return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 828708f6510c..048d6c6fe7b9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5330,8 +5330,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) } } -int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, int wait, int mirror_num) +int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) { int i; struct page *page; @@ -5343,6 +5342,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, unsigned long num_reads = 0; struct bio *bio = NULL; unsigned long bio_flags = 0; + struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index abafb48947ef..37240e03c4e3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -433,8 +433,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_NONE 0 #define WAIT_COMPLETE 1 #define WAIT_PAGE_LOCK 2 -int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, int wait, +int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num); void wait_on_extent_buffer_writeback(struct extent_buffer *eb); -- cgit v1.2.3-59-g8ed1b From e7681167c36f451b1645211116ef287d9c47b07a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 17:24:41 +0300 Subject: btrfs: Pass 0 for bio_offset to btrfs_wq_submit_bio Buffered writeback always calls btrfs_csum_one_bio with the last 2 arguments being 0 irrespective of what the bio_offset has been passed to btrfs_submit_bio_start. Make this apparent by explicitly passing 0 for bio_offset when calling btrfs_wq_submit_bio from btrfs_submit_bio_hook. This will allow for further simplifications down the line. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 130b1c8f546a..1e7e8421e524 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1991,8 +1991,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, goto mapit; /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, - bio_offset, inode, - btrfs_submit_bio_start); + 0, inode, btrfs_submit_bio_start); goto out; } else if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, 0, 0); -- cgit v1.2.3-59-g8ed1b From e68f2ee72181697d574e3049426b9d4fe5b5ec4d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 17:24:42 +0300 Subject: btrfs: Always pass 0 bio_offset for btree_submit_bio_start The btree submit hook queues the async csum and forwards the bio_offset parameter passed to btree_submit_bio_hook. This is redundant since btree_submit_bio_start calls btree_csum_one_bio which doesn't use the offset at all. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62983ff0c3e1..adc492848619 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, * checksumming can happen in parallel across all CPUs */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, - bio_offset, inode, - btree_submit_bio_start); + 0, inode, btree_submit_bio_start); } if (ret) -- cgit v1.2.3-59-g8ed1b From 50489a5734ec77e0a0613143512de09e2229f852 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 10 Apr 2019 19:46:04 +0300 Subject: btrfs: Remove bio_offset argument from submit_bio_hook None of the implementers of the submit_bio_hook use the bio_offset parameter, simply remove it. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent_io.c | 10 ++-------- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 5 +++-- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index adc492848619..eb26b3239827 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -885,8 +885,8 @@ static int check_async_write(struct btrfs_inode *bi) } static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) + int mirror_num, + unsigned long bio_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int async = check_async_write(BTRFS_I(inode)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 048d6c6fe7b9..9aa79ad794c9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -149,19 +149,13 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_flags) { blk_status_t ret = 0; - struct bio_vec *bvec = bio_last_bvec_all(bio); - struct bio_vec bv; struct extent_io_tree *tree = bio->bi_private; - u64 start; - - mp_bvec_last_segment(bvec, &bv); - start = page_offset(bv.bv_page) + bv.bv_offset; bio->bi_private = NULL; if (tree->ops) ret = tree->ops->submit_bio_hook(tree->private_data, bio, - mirror_num, bio_flags, start); + mirror_num, bio_flags); else btrfsic_submit_bio(bio); @@ -2546,7 +2540,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, read_mode, failrec->this_mirror, failrec->in_validation); status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror, - failrec->bio_flags, 0); + failrec->bio_flags); if (status) { free_io_failure(failure_tree, tree, failrec); bio_put(bio); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 37240e03c4e3..aa18a16a6ed7 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -105,8 +105,7 @@ struct extent_io_ops { * pointer will be called unconditionally. */ blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset); + int mirror_num, unsigned long bio_flags); int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e7e8421e524..837fd573991a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1954,8 +1954,9 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio, * c-3) otherwise: async submit */ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) + int mirror_num, + unsigned long bio_flags) + { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; -- cgit v1.2.3-59-g8ed1b From f72f0010b202f8ccf8fa671f9ef61e4c980fba3c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:39:45 +0100 Subject: btrfs: get fs_info from eb in __push_leaf_right We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f52eb952597b..727a7b656b44 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3589,13 +3589,13 @@ noinline int btrfs_leaf_free_space(struct extent_buffer *leaf) * min slot controls the lowest index we're willing to push to the * right. We'll push up to and including min_slot, but no lower */ -static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static noinline int __push_leaf_right(struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, int free_space, u32 left_nritems, u32 min_slot) { + struct btrfs_fs_info *fs_info = right->fs_info; struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; struct btrfs_map_token token; @@ -3743,7 +3743,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root int min_data_size, int data_size, int empty, u32 min_slot) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *left = path->nodes[0]; struct extent_buffer *right; struct extent_buffer *upper; @@ -3804,7 +3803,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } - return __push_leaf_right(fs_info, path, min_data_size, empty, + return __push_leaf_right(path, min_data_size, empty, right, free_space, left_nritems, min_slot); out_unlock: btrfs_tree_unlock(right); -- cgit v1.2.3-59-g8ed1b From 8087c193450b29c80c8e8b62050de9d0999e20cf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:40:41 +0100 Subject: btrfs: get fs_info from eb in __push_leaf_left We can read fs_info from extent buffer and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 727a7b656b44..e26131d14cc4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3819,12 +3819,12 @@ out_unlock: * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the * items */ -static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, int data_size, +static noinline int __push_leaf_left(struct btrfs_path *path, int data_size, int empty, struct extent_buffer *left, int free_space, u32 right_nritems, u32 max_slot) { + struct btrfs_fs_info *fs_info = left->fs_info; struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; int i; @@ -3976,7 +3976,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int min_data_size, int data_size, int empty, u32 max_slot) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *right = path->nodes[0]; struct extent_buffer *left; int slot; @@ -4029,7 +4028,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - return __push_leaf_left(fs_info, path, min_data_size, + return __push_leaf_left(path, min_data_size, empty, left, free_space, right_nritems, max_slot); out: -- cgit v1.2.3-59-g8ed1b From f331a9525f96c8f24e33b368521dfdeed3cb6f9d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:31:53 +0100 Subject: btrfs: get fs_info from device in btrfs_rm_dev_item We can read fs_info from the device and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5ee24876fac3..dcbfd027fdf6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1941,10 +1941,9 @@ static void update_dev_time(const char *path_name) filp_close(filp, NULL); } -static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, - struct btrfs_device *device) +static int btrfs_rm_dev_item(struct btrfs_device *device) { - struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_root *root = device->fs_info->chunk_root; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -2145,7 +2144,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, * counter although write_all_supers() is not locked out. This * could give a filesystem state which requires a degraded mount. */ - ret = btrfs_rm_dev_item(fs_info, device); + ret = btrfs_rm_dev_item(device); if (ret) goto error_undo; -- cgit v1.2.3-59-g8ed1b From 163e97ee0d0f8194ef66f10b0bf0851b7f6b55dd Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:32:55 +0100 Subject: btrfs: get fs_info from device in btrfs_scrub_cancel_dev We can read fs_info from the device and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/scrub.c | 4 ++-- fs/btrfs/volumes.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 69a60d2217e5..05731e4ca358 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3752,8 +3752,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, void btrfs_scrub_pause(struct btrfs_fs_info *fs_info); void btrfs_scrub_continue(struct btrfs_fs_info *fs_info); int btrfs_scrub_cancel(struct btrfs_fs_info *info); -int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, - struct btrfs_device *dev); +int btrfs_scrub_cancel_dev(struct btrfs_device *dev); int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress); static inline void btrfs_init_full_stripe_locks_tree( diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ed471ffbf115..f7b29f9db5e2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3999,9 +3999,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, - struct btrfs_device *dev) +int btrfs_scrub_cancel_dev(struct btrfs_device *dev) { + struct btrfs_fs_info *fs_info = dev->fs_info; struct scrub_ctx *sctx; mutex_lock(&fs_info->scrub_lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dcbfd027fdf6..c5bdd38179b6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2149,7 +2149,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, goto error_undo; clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - btrfs_scrub_cancel_dev(fs_info, device); + btrfs_scrub_cancel_dev(device); /* * the device list mutex makes sure that we don't change -- cgit v1.2.3-59-g8ed1b From 65237ee3b6b3c529548438054a819f63fb50757d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 16:34:54 +0100 Subject: btrfs: get fs_info from device in btrfs_rm_dev_replace_free_srcdev We can read fs_info from the device and can drop it from the parameters. Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/volumes.c | 4 ++-- fs/btrfs/volumes.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eadf859934fe..55c15f31d00d 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -696,7 +696,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, /* replace the sysfs entry */ btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device); - btrfs_rm_dev_replace_free_srcdev(fs_info, src_device); + btrfs_rm_dev_replace_free_srcdev(src_device); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c5bdd38179b6..1c2a6e4b39da 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2258,9 +2258,9 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) fs_devices->open_devices--; } -void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev) +void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) { + struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b3a89d767512..b8a0e8d0672d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -464,8 +464,7 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); -void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev); +void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev); void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, -- cgit v1.2.3-59-g8ed1b From bfc61c36260ca990937539cd648ede3cd749bc10 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Apr 2019 11:30:30 +0100 Subject: Btrfs: do not start a transaction at iterate_extent_inodes() When finding out which inodes have references on a particular extent, done by backref.c:iterate_extent_inodes(), from the BTRFS_IOC_LOGICAL_INO (both v1 and v2) ioctl and from scrub we use the transaction join API to grab a reference on the currently running transaction, since in order to give accurate results we need to inspect the delayed references of the currently running transaction. However, if there is currently no running transaction, the join operation will create a new transaction. This is inefficient as the transaction will eventually be committed, doing unnecessary IO and introducing a potential point of failure that will lead to a transaction abort due to -ENOSPC, as recently reported [1]. That's because the join, creates the transaction but does not reserve any space, so when attempting to update the root item of the root passed to btrfs_join_transaction(), during the transaction commit, we can end up failling with -ENOSPC. Users of a join operation are supposed to actually do some filesystem changes and reserve space by some means, which is not the case of iterate_extent_inodes(), it is a read-only operation for all contextes from which it is called. The reported [1] -ENOSPC failure stack trace is the following: heisenberg kernel: ------------[ cut here ]------------ heisenberg kernel: BTRFS: Transaction aborted (error -28) heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 heisenberg kernel: Call Trace: heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] heisenberg kernel: ? _cond_resched+0x15/0x30 heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] heisenberg kernel: kthread+0x112/0x130 heisenberg kernel: ? kthread_bind+0x30/0x30 heisenberg kernel: ret_from_fork+0x35/0x40 heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- So fix that by using the attach API, which does not create a transaction when there is currently no running transaction. [1] https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ Reported-by: Zygo Blaxell CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2602072ed906..849b8c767efb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1912,13 +1912,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, extent_item_objectid); if (!search_commit_root) { - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + trans = btrfs_attach_transaction(fs_info->extent_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && + PTR_ERR(trans) != -EROFS) + return PTR_ERR(trans); + trans = NULL; + } + } + + if (trans) btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); - } else { + else down_read(&fs_info->commit_root_sem); - } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, tree_mod_seq_elem.seq, &refs, @@ -1951,7 +1957,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, free_leaf_list(refs); out: - if (!search_commit_root) { + if (trans) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans); } else { -- cgit v1.2.3-59-g8ed1b From b28b1f0ce44c1b9ebc1c43e3eba18c1f1f5d9cec Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:29 +0800 Subject: btrfs: delayed-ref: Introduce better documented delayed ref structures Current delayed ref interface has several problems: - Longer and longer parameter lists bytenr num_bytes parent ---------- so far so good ref_root owner offset ---------- I don't feel good now - Different interpretation of the same parameter Above @owner for data ref is inode number (u64), while for tree ref, it's level (int). They are even in different size range. For level we only need 0 ~ 8, while for ino it's BTRFS_FIRST_FREE_OBJECTID ~ BTRFS_LAST_FREE_OBJECTID. And @offset doesn't even make sense for tree ref. Such parameter reuse may look clever as an hidden union, but it destroys code readability. To solve both problems, we introduce a new structure, btrfs_ref to solve them: - Structure instead of long parameter list This makes later expansion easier, and is better documented. - Use btrfs_ref::type to distinguish data and tree ref - Use proper union to store data/tree ref specific structures. - Use separate functions to fill data/tree ref data, with a common generic function to fill common bytenr/num_bytes members. All parameters will find its place in btrfs_ref, and an extra member, @real_root, inspired by ref-verify code, is newly introduced for later qgroup code, to record which tree is triggered by this extent modification. This patch doesn't touch any code, but provides the basis for further refactoring. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.h | 109 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 70606da440aa..d05bb6e2e1a6 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -176,6 +176,83 @@ struct btrfs_delayed_ref_root { u64 qgroup_to_skip; }; +enum btrfs_ref_type { + BTRFS_REF_NOT_SET, + BTRFS_REF_DATA, + BTRFS_REF_METADATA, + BTRFS_REF_LAST, +}; + +struct btrfs_data_ref { + /* For EXTENT_DATA_REF */ + + /* Root which refers to this data extent */ + u64 ref_root; + + /* Inode which refers to this data extent */ + u64 ino; + + /* + * file_offset - extent_offset + * + * file_offset is the key.offset of the EXTENT_DATA key. + * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. + */ + u64 offset; +}; + +struct btrfs_tree_ref { + /* + * Level of this tree block + * + * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. + */ + int level; + + /* + * Root which refers to this tree block. + * + * For TREE_BLOCK_REF (skinny metadata, either inline or keyed) + */ + u64 root; + + /* For non-skinny metadata, no special member needed */ +}; + +struct btrfs_ref { + enum btrfs_ref_type type; + int action; + + /* + * Whether this extent should go through qgroup record. + * + * Normally false, but for certain cases like delayed subtree scan, + * setting this flag can hugely reduce qgroup overhead. + */ + bool skip_qgroup; + + /* + * Optional. For which root is this modification. + * Mostly used for qgroup optimization. + * + * When unset, data/tree ref init code will populate it. + * In certain cases, we're modifying reference for a different root. + * E.g. COW fs tree blocks for balance. + * In that case, tree_ref::root will be fs tree, but we're doing this + * for reloc tree, then we should set @real_root to reloc tree. + */ + u64 real_root; + u64 bytenr; + u64 len; + + /* Bytenr of the parent tree block */ + u64 parent; + union { + struct btrfs_data_ref data_ref; + struct btrfs_tree_ref tree_ref; + }; +}; + extern struct kmem_cache *btrfs_delayed_ref_head_cachep; extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; extern struct kmem_cache *btrfs_delayed_data_ref_cachep; @@ -184,6 +261,38 @@ extern struct kmem_cache *btrfs_delayed_extent_op_cachep; int __init btrfs_delayed_ref_init(void); void __cold btrfs_delayed_ref_exit(void); +static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, + int action, u64 bytenr, u64 len, u64 parent) +{ + generic_ref->action = action; + generic_ref->bytenr = bytenr; + generic_ref->len = len; + generic_ref->parent = parent; +} + +static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, + int level, u64 root) +{ + /* If @real_root not set, use @root as fallback */ + if (!generic_ref->real_root) + generic_ref->real_root = root; + generic_ref->tree_ref.level = level; + generic_ref->tree_ref.root = root; + generic_ref->type = BTRFS_REF_METADATA; +} + +static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, + u64 ref_root, u64 ino, u64 offset) +{ + /* If @real_root not set, use @root as fallback */ + if (!generic_ref->real_root) + generic_ref->real_root = ref_root; + generic_ref->data_ref.ref_root = ref_root; + generic_ref->data_ref.ino = ino; + generic_ref->data_ref.offset = offset; + generic_ref->type = BTRFS_REF_DATA; +} + static inline struct btrfs_delayed_extent_op * btrfs_alloc_delayed_extent_op(void) { -- cgit v1.2.3-59-g8ed1b From dd28b6a5aad306c417477db700ed0848c923a31c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:30 +0800 Subject: btrfs: extent-tree: Open-code process_func in __btrfs_mod_ref The process_func function pointer is local to __btrfs_mod_ref() and points to either btrfs_inc_extent_ref() or btrfs_free_extent(). Open code it to make later delayed ref refactor easier, so we can refactor btrfs_inc_extent_ref() and btrfs_free_extent() in different patches. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21c9d4a482e0..cd33b89418dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3157,10 +3157,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int i; int level; int ret = 0; - int (*process_func)(struct btrfs_trans_handle *, - struct btrfs_root *, - u64, u64, u64, u64, u64, u64); - if (btrfs_is_testing(fs_info)) return 0; @@ -3172,11 +3168,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) return 0; - if (inc) - process_func = btrfs_inc_extent_ref; - else - process_func = btrfs_free_extent; - if (full_backref) parent = buf->start; else @@ -3198,16 +3189,27 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); key.offset -= btrfs_file_extent_offset(buf, fi); - ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, key.objectid, - key.offset); + if (inc) + ret = btrfs_inc_extent_ref(trans, root, bytenr, + num_bytes, parent, ref_root, + key.objectid, key.offset); + else + ret = btrfs_free_extent(trans, root, bytenr, + num_bytes, parent, ref_root, + key.objectid, key.offset); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = fs_info->nodesize; - ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, level - 1, 0); + if (inc) + ret = btrfs_inc_extent_ref(trans, root, bytenr, + num_bytes, parent, ref_root, + level - 1, 0); + else + ret = btrfs_free_extent(trans, root, bytenr, + num_bytes, parent, ref_root, + level - 1, 0); if (ret) goto fail; } -- cgit v1.2.3-59-g8ed1b From ed4f255b9bacb774c99ded17647f138c3f61546d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:31 +0800 Subject: btrfs: delayed-ref: Use btrfs_ref to refactor btrfs_add_delayed_tree_ref() btrfs_add_delayed_tree_ref() has a longer and longer parameter list, and some callers like btrfs_inc_extent_ref() are using @owner as level for delayed tree ref. Instead of making the parameter list longer, use btrfs_ref to refactor it, so each parameter assignment should be self-explaining without dirty level/owner trick, and provides the basis for later refactoring. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 24 +++++++++++++++++------- fs/btrfs/delayed-ref.h | 3 +-- fs/btrfs/extent-tree.c | 44 ++++++++++++++++++++++++++------------------ 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 7d2a413df90d..a9c2f469caa9 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -735,8 +735,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, * transaction commits. */ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, + struct btrfs_ref *generic_ref, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod) { @@ -746,10 +745,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; - bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); + bool is_system; + int action = generic_ref->action; + int level = generic_ref->tree_ref.level; int ret; + u64 bytenr = generic_ref->bytenr; + u64 num_bytes = generic_ref->len; + u64 parent = generic_ref->parent; u8 ref_type; + is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID); + + ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); if (!ref) @@ -762,7 +769,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, } if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - is_fstree(ref_root)) { + is_fstree(generic_ref->real_root) && + is_fstree(generic_ref->tree_ref.root) && + !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); @@ -777,13 +786,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, ref_type = BTRFS_TREE_BLOCK_REF_KEY; init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, - ref_root, action, ref_type); - ref->root = ref_root; + generic_ref->tree_ref.root, action, ref_type); + ref->root = generic_ref->tree_ref.root; ref->parent = parent; ref->level = level; init_delayed_ref_head(head_ref, record, bytenr, num_bytes, - ref_root, 0, action, false, is_system); + generic_ref->tree_ref.root, 0, action, false, + is_system); head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index d05bb6e2e1a6..515d1a3d8080 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -333,8 +333,7 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea } int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, + struct btrfs_ref *generic_ref, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod); int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd33b89418dd..88deec012386 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2047,6 +2047,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_ref generic_ref = { 0 }; int old_ref_mod, new_ref_mod; int ret; @@ -2056,12 +2057,13 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_ADD_DELAYED_REF); + btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_REF, bytenr, + num_bytes, parent); + generic_ref.real_root = root->root_key.objectid; if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(trans, bytenr, - num_bytes, parent, - root_objectid, (int)owner, - BTRFS_ADD_DELAYED_REF, NULL, - &old_ref_mod, &new_ref_mod); + btrfs_init_tree_ref(&generic_ref, (int)owner, root_objectid); + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, + NULL, &old_ref_mod, &new_ref_mod); } else { ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, parent, @@ -7176,9 +7178,15 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, u64 parent, int last_ref) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_ref generic_ref = { 0 }; int pin = 1; int ret; + btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, + buf->start, buf->len, parent); + btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), + root->root_key.objectid); + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { int old_ref_mod, new_ref_mod; @@ -7186,11 +7194,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, root->root_key.objectid, btrfs_header_level(buf), 0, BTRFS_DROP_DELAYED_REF); - ret = btrfs_add_delayed_tree_ref(trans, buf->start, - buf->len, parent, - root->root_key.objectid, - btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL, + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, &old_ref_mod, &new_ref_mod); BUG_ON(ret); /* -ENOMEM */ pin = old_ref_mod >= 0 && new_ref_mod < 0; @@ -7242,6 +7246,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 owner, u64 offset) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_ref generic_ref = { 0 }; int old_ref_mod, new_ref_mod; int ret; @@ -7253,6 +7258,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF); + btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, bytenr, + num_bytes, parent); + generic_ref.real_root = root->root_key.objectid; /* * tree log blocks never actually go into the extent allocation * tree, just update pinning info and exit early. @@ -7264,10 +7272,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, old_ref_mod = new_ref_mod = 0; ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(trans, bytenr, - num_bytes, parent, - root_objectid, (int)owner, - BTRFS_DROP_DELAYED_REF, NULL, + btrfs_init_tree_ref(&generic_ref, (int)owner, root_objectid); + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, &old_ref_mod, &new_ref_mod); } else { ret = btrfs_add_delayed_data_ref(trans, bytenr, @@ -8583,6 +8589,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *block_rsv; struct extent_buffer *buf; struct btrfs_delayed_extent_op *extent_op; + struct btrfs_ref generic_ref = { 0 }; u64 flags = 0; int ret; u32 blocksize = fs_info->nodesize; @@ -8640,10 +8647,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, root_objectid, level, 0, BTRFS_ADD_DELAYED_EXTENT); - ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, - ins.offset, parent, - root_objectid, level, - BTRFS_ADD_DELAYED_EXTENT, + btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, + ins.objectid, ins.offset, parent); + generic_ref.real_root = root->root_key.objectid; + btrfs_init_tree_ref(&generic_ref, level, root_objectid); + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op, NULL, NULL); if (ret) goto out_free_delayed; -- cgit v1.2.3-59-g8ed1b From 76675593b69f2fcd57e24d9dd2a9b278f0130d0b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:32 +0800 Subject: btrfs: delayed-ref: Use btrfs_ref to refactor btrfs_add_delayed_data_ref() Just like btrfs_add_delayed_tree_ref(), use btrfs_ref to refactor btrfs_add_delayed_data_ref(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 19 ++++++++++++++----- fs/btrfs/delayed-ref.h | 7 +++---- fs/btrfs/extent-tree.c | 23 ++++++++++------------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index a9c2f469caa9..5c1f2462f757 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -832,10 +832,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. */ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, - u64 owner, u64 offset, u64 reserved, int action, - int *old_ref_mod, int *new_ref_mod) + struct btrfs_ref *generic_ref, + u64 reserved, int *old_ref_mod, + int *new_ref_mod) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_data_ref *ref; @@ -843,9 +842,17 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; + int action = generic_ref->action; int ret; + u64 bytenr = generic_ref->bytenr; + u64 num_bytes = generic_ref->len; + u64 parent = generic_ref->parent; + u64 ref_root = generic_ref->data_ref.ref_root; + u64 owner = generic_ref->data_ref.ino; + u64 offset = generic_ref->data_ref.offset; u8 ref_type; + ASSERT(generic_ref->type == BTRFS_REF_DATA && action); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); if (!ref) return -ENOMEM; @@ -869,7 +876,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, } if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - is_fstree(ref_root)) { + is_fstree(ref_root) && + is_fstree(generic_ref->real_root) && + !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 515d1a3d8080..7cbb52adc553 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -337,10 +337,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod); int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, - u64 owner, u64 offset, u64 reserved, int action, - int *old_ref_mod, int *new_ref_mod); + struct btrfs_ref *generic_ref, + u64 reserved, int *old_ref_mod, + int *new_ref_mod); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 88deec012386..cbddd5ab7d65 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2065,10 +2065,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, &old_ref_mod, &new_ref_mod); } else { - ret = btrfs_add_delayed_data_ref(trans, bytenr, - num_bytes, parent, - root_objectid, owner, offset, - 0, BTRFS_ADD_DELAYED_REF, + btrfs_init_data_ref(&generic_ref, root_objectid, owner, offset); + ret = btrfs_add_delayed_data_ref(trans, &generic_ref, 0, &old_ref_mod, &new_ref_mod); } @@ -7276,10 +7274,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, &old_ref_mod, &new_ref_mod); } else { - ret = btrfs_add_delayed_data_ref(trans, bytenr, - num_bytes, parent, - root_objectid, owner, offset, - 0, BTRFS_DROP_DELAYED_REF, + btrfs_init_data_ref(&generic_ref, root_objectid, owner, offset); + ret = btrfs_add_delayed_data_ref(trans, &generic_ref, 0, &old_ref_mod, &new_ref_mod); } @@ -8385,6 +8381,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, u64 offset, u64 ram_bytes, struct btrfs_key *ins) { + struct btrfs_ref generic_ref = { 0 }; int ret; BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); @@ -8393,11 +8390,11 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, root->root_key.objectid, owner, offset, BTRFS_ADD_DELAYED_EXTENT); - ret = btrfs_add_delayed_data_ref(trans, ins->objectid, - ins->offset, 0, - root->root_key.objectid, owner, - offset, ram_bytes, - BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); + btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, + ins->objectid, ins->offset, 0); + btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); + ret = btrfs_add_delayed_data_ref(trans, &generic_ref, + ram_bytes, NULL, NULL); return ret; } -- cgit v1.2.3-59-g8ed1b From 8a5040f7d9fddc532bdec698fb0ff0bebe33d4ed Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:33 +0800 Subject: btrfs: ref-verify: Use btrfs_ref to refactor btrfs_ref_tree_mod() It's a perfect match for btrfs_ref_tree_mod() to use btrfs_ref, as btrfs_ref describes a metadata/data reference update comprehensively. Now we have one less function use confusing owner/level trick. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 27 ++++++++----------------- fs/btrfs/ref-verify.c | 53 ++++++++++++++++++++++++++++---------------------- fs/btrfs/ref-verify.h | 10 ++++------ 3 files changed, 42 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cbddd5ab7d65..e7444a181793 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2054,9 +2054,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && root_objectid == BTRFS_TREE_LOG_OBJECTID); - btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, - owner, offset, BTRFS_ADD_DELAYED_REF); - btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; @@ -2070,6 +2067,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, &old_ref_mod, &new_ref_mod); } + btrfs_ref_tree_mod(fs_info, &generic_ref); + if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) { bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; @@ -7188,10 +7187,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { int old_ref_mod, new_ref_mod; - btrfs_ref_tree_mod(root, buf->start, buf->len, parent, - root->root_key.objectid, - btrfs_header_level(buf), 0, - BTRFS_DROP_DELAYED_REF); + btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, &old_ref_mod, &new_ref_mod); BUG_ON(ret); /* -ENOMEM */ @@ -7251,11 +7247,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, if (btrfs_is_testing(fs_info)) return 0; - if (root_objectid != BTRFS_TREE_LOG_OBJECTID) - btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, - root_objectid, owner, offset, - BTRFS_DROP_DELAYED_REF); - btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; @@ -7279,6 +7270,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, &old_ref_mod, &new_ref_mod); } + if (root_objectid != BTRFS_TREE_LOG_OBJECTID) + btrfs_ref_tree_mod(fs_info, &generic_ref); + if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) { bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; @@ -8386,13 +8380,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); - btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0, - root->root_key.objectid, owner, offset, - BTRFS_ADD_DELAYED_EXTENT); - btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins->objectid, ins->offset, 0); btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); + btrfs_ref_tree_mod(root->fs_info, &generic_ref); ret = btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes, NULL, NULL); return ret; @@ -8641,13 +8632,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, extent_op->is_data = false; extent_op->level = level; - btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, - root_objectid, level, 0, - BTRFS_ADD_DELAYED_EXTENT); btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins.objectid, ins.offset, parent); generic_ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&generic_ref, level, root_objectid); + btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op, NULL, NULL); if (ret) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index d09b6cdb785a..4a0f6289ef17 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -670,36 +670,43 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info, /* * btrfs_ref_tree_mod: called when we modify a ref for a bytenr - * @root: the root we are making this modification from. - * @bytenr: the bytenr we are modifying. - * @num_bytes: number of bytes. - * @parent: the parent bytenr. - * @ref_root: the original root owner of the bytenr. - * @owner: level in the case of metadata, inode in the case of data. - * @offset: 0 for metadata, file offset for data. - * @action: the action that we are doing, this is the same as the delayed ref - * action. * * This will add an action item to the given bytenr and do sanity checks to make * sure we haven't messed something up. If we are making a new allocation and * this block entry has history we will delete all previous actions as long as * our sanity checks pass as they are no longer needed. */ -int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, u64 owner, u64 offset, - int action) +int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, + struct btrfs_ref *generic_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; struct ref_entry *ref = NULL, *exist; struct ref_action *ra = NULL; struct block_entry *be = NULL; struct root_entry *re = NULL; + int action = generic_ref->action; int ret = 0; - bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; + bool metadata; + u64 bytenr = generic_ref->bytenr; + u64 num_bytes = generic_ref->len; + u64 parent = generic_ref->parent; + u64 ref_root; + u64 owner; + u64 offset; - if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) + if (!btrfs_test_opt(fs_info, REF_VERIFY)) return 0; + if (generic_ref->type == BTRFS_REF_METADATA) { + ref_root = generic_ref->tree_ref.root; + owner = generic_ref->tree_ref.level; + offset = 0; + } else { + ref_root = generic_ref->data_ref.ref_root; + owner = generic_ref->data_ref.ino; + offset = generic_ref->data_ref.offset; + } + metadata = owner < BTRFS_FIRST_FREE_OBJECTID; + ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); if (!ra || !ref) { @@ -732,7 +739,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, INIT_LIST_HEAD(&ra->list); ra->action = action; - ra->root = root->root_key.objectid; + ra->root = generic_ref->real_root; /* * This is an allocation, preallocate the block_entry in case we haven't @@ -745,7 +752,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, * is and the new root objectid, so let's not treat the passed * in root as if it really has a ref for this bytenr. */ - be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); + be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); if (IS_ERR(be)) { kfree(ra); ret = PTR_ERR(be); @@ -787,13 +794,13 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, * one we want to lookup below when we modify the * re->num_refs. */ - ref_root = root->root_key.objectid; - re->root_objectid = root->root_key.objectid; + ref_root = generic_ref->real_root; + re->root_objectid = generic_ref->real_root; re->num_refs = 0; } - spin_lock(&root->fs_info->ref_verify_lock); - be = lookup_block_entry(&root->fs_info->block_tree, bytenr); + spin_lock(&fs_info->ref_verify_lock); + be = lookup_block_entry(&fs_info->block_tree, bytenr); if (!be) { btrfs_err(fs_info, "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", @@ -862,7 +869,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, * didn't think of some other corner case. */ btrfs_err(fs_info, "failed to find root %llu for %llu", - root->root_key.objectid, be->bytenr); + generic_ref->real_root, be->bytenr); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); kfree(ra); @@ -881,7 +888,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, list_add_tail(&ra->list, &be->actions); ret = 0; out_unlock: - spin_unlock(&root->fs_info->ref_verify_lock); + spin_unlock(&fs_info->ref_verify_lock); out: if (ret) btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h index b7d2a4edfdb7..855de37719b5 100644 --- a/fs/btrfs/ref-verify.h +++ b/fs/btrfs/ref-verify.h @@ -9,9 +9,8 @@ #ifdef CONFIG_BTRFS_FS_REF_VERIFY int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info); void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info); -int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, u64 owner, u64 offset, - int action); +int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, + struct btrfs_ref *generic_ref); void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, u64 len); @@ -30,9 +29,8 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) { } -static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 parent, u64 ref_root, - u64 owner, u64 offset, int action) +static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, + struct btrfs_ref *generic_ref) { return 0; } -- cgit v1.2.3-59-g8ed1b From ddf30cf03fb53b9a0ad0f355a69dbedf416edde9 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:34 +0800 Subject: btrfs: extent-tree: Use btrfs_ref to refactor add_pinned_bytes() Since add_pinned_bytes() only needs to know if the extent is metadata and if it's a chunk tree extent, btrfs_ref is a perfect match for it, as we don't need various owner/level trick to determine extent type. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e7444a181793..4fbcd0f7b204 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -756,14 +756,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, return NULL; } -static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes, - bool metadata, u64 root_objectid) +static void add_pinned_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_ref *ref) { struct btrfs_space_info *space_info; + s64 num_bytes = -ref->len; u64 flags; - if (metadata) { - if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) + if (ref->type == BTRFS_REF_METADATA) { + if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) flags = BTRFS_BLOCK_GROUP_SYSTEM; else flags = BTRFS_BLOCK_GROUP_METADATA; @@ -2069,11 +2070,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(fs_info, &generic_ref); - if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) { - bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; - - add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid); - } + if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) + add_pinned_bytes(fs_info, &generic_ref); return ret; } @@ -7221,8 +7219,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, } out: if (pin) - add_pinned_bytes(fs_info, buf->len, true, - root->root_key.objectid); + add_pinned_bytes(fs_info, &generic_ref); if (last_ref) { /* @@ -7273,11 +7270,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, if (root_objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ref_tree_mod(fs_info, &generic_ref); - if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) { - bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; - - add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid); - } + if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) + add_pinned_bytes(fs_info, &generic_ref); return ret; } -- cgit v1.2.3-59-g8ed1b From 82fa113fccc41fe5204b4ce35341d69ebde0020f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:35 +0800 Subject: btrfs: extent-tree: Use btrfs_ref to refactor btrfs_inc_extent_ref() Use the new btrfs_ref structure and replace parameter list to clean up the usage of owner and level to distinguish the extent types. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++---------------------- fs/btrfs/file.c | 17 ++++++++++----- fs/btrfs/inode.c | 10 +++++---- fs/btrfs/ioctl.c | 15 +++++++------ fs/btrfs/relocation.c | 42 ++++++++++++++++++++++++------------- fs/btrfs/tree-log.c | 11 +++++++--- 7 files changed, 96 insertions(+), 61 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 05731e4ca358..820c7425258a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; extern struct kmem_cache *btrfs_free_space_cachep; struct btrfs_ordered_sum; +struct btrfs_ref; #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ @@ -2760,9 +2761,7 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset); + struct btrfs_ref *generic_ref); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4fbcd0f7b204..c2453ee035d9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2043,35 +2043,28 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, /* Can return -ENOMEM */ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset) + struct btrfs_ref *generic_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_ref generic_ref = { 0 }; + struct btrfs_fs_info *fs_info = trans->fs_info; int old_ref_mod, new_ref_mod; int ret; - BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && - root_objectid == BTRFS_TREE_LOG_OBJECTID); + ASSERT(generic_ref->type != BTRFS_REF_NOT_SET && + generic_ref->action); + BUG_ON(generic_ref->type == BTRFS_REF_METADATA && + generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID); - btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_REF, bytenr, - num_bytes, parent); - generic_ref.real_root = root->root_key.objectid; - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - btrfs_init_tree_ref(&generic_ref, (int)owner, root_objectid); - ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, + if (generic_ref->type == BTRFS_REF_METADATA) + ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL, &old_ref_mod, &new_ref_mod); - } else { - btrfs_init_data_ref(&generic_ref, root_objectid, owner, offset); - ret = btrfs_add_delayed_data_ref(trans, &generic_ref, 0, + else + ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0, &old_ref_mod, &new_ref_mod); - } - btrfs_ref_tree_mod(fs_info, &generic_ref); + btrfs_ref_tree_mod(fs_info, generic_ref); if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) - add_pinned_bytes(fs_info, &generic_ref); + add_pinned_bytes(fs_info, generic_ref); return ret; } @@ -3151,7 +3144,10 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; + struct btrfs_ref generic_ref = { 0 }; + bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC); int i; + int action; int level; int ret = 0; @@ -3169,6 +3165,10 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, parent = buf->start; else parent = 0; + if (inc) + action = BTRFS_ADD_DELAYED_REF; + else + action = BTRFS_DROP_DELAYED_REF; for (i = 0; i < nritems; i++) { if (level == 0) { @@ -3186,10 +3186,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); key.offset -= btrfs_file_extent_offset(buf, fi); + btrfs_init_generic_ref(&generic_ref, action, bytenr, + num_bytes, parent); + generic_ref.real_root = root->root_key.objectid; + btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, + key.offset); + generic_ref.skip_qgroup = for_reloc; if (inc) - ret = btrfs_inc_extent_ref(trans, root, bytenr, - num_bytes, parent, ref_root, - key.objectid, key.offset); + ret = btrfs_inc_extent_ref(trans, &generic_ref); else ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, ref_root, @@ -3199,10 +3203,13 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = fs_info->nodesize; + btrfs_init_generic_ref(&generic_ref, action, bytenr, + num_bytes, parent); + generic_ref.real_root = root->root_key.objectid; + btrfs_init_tree_ref(&generic_ref, level - 1, ref_root); + generic_ref.skip_qgroup = for_reloc; if (inc) - ret = btrfs_inc_extent_ref(trans, root, bytenr, - num_bytes, parent, ref_root, - level - 1, 0); + ret = btrfs_inc_extent_ref(trans, &generic_ref); else ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, ref_root, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c857a884a90f..a4fc89a84baf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -754,6 +754,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; + struct btrfs_ref ref = { 0 }; struct btrfs_key key; struct btrfs_key new_key; u64 ino = btrfs_ino(BTRFS_I(inode)); @@ -909,11 +910,14 @@ next_slot: btrfs_mark_buffer_dirty(leaf); if (update_refs && disk_bytenr > 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, num_bytes, 0, + btrfs_init_generic_ref(&ref, + BTRFS_ADD_DELAYED_REF, + disk_bytenr, num_bytes, 0); + btrfs_init_data_ref(&ref, root->root_key.objectid, new_key.objectid, start - extent_offset); + ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); /* -ENOMEM */ } key.offset = start; @@ -1142,6 +1146,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_path *path; struct btrfs_file_extent_item *fi; + struct btrfs_ref ref = { 0 }; struct btrfs_key key; struct btrfs_key new_key; u64 bytenr; @@ -1287,9 +1292,11 @@ again: extent_end - split); btrfs_mark_buffer_dirty(leaf); - ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, - ino, orig_offset); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, + num_bytes, 0); + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, + orig_offset); + ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 837fd573991a..9e736794a6a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2519,6 +2519,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, struct btrfs_file_extent_item *item; struct btrfs_ordered_extent *ordered; struct btrfs_trans_handle *trans; + struct btrfs_ref ref = { 0 }; struct btrfs_root *root; struct btrfs_key key; struct extent_buffer *leaf; @@ -2689,10 +2690,11 @@ again: inode_add_bytes(inode, len); btrfs_release_path(path); - ret = btrfs_inc_extent_ref(trans, root, new->bytenr, - new->disk_len, 0, - backref->root_id, backref->inum, - new->file_pos); /* start - extent_offset */ + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr, + new->disk_len, 0); + btrfs_init_data_ref(&ref, backref->root_id, backref->inum, + new->file_pos); /* start - extent_offset */ + ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out_free_path; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8c9a908d3acc..19b0ee4e2c70 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3737,13 +3737,16 @@ process_slot: datal); if (disko) { + struct btrfs_ref ref = { 0 }; inode_add_bytes(inode, datal); - ret = btrfs_inc_extent_ref(trans, - root, - disko, diskl, 0, - root->root_key.objectid, - btrfs_ino(BTRFS_I(inode)), - new_key.offset - datao); + btrfs_init_generic_ref(&ref, + BTRFS_ADD_DELAYED_REF, disko, + diskl, 0); + btrfs_init_data_ref(&ref, + root->root_key.objectid, + btrfs_ino(BTRFS_I(inode)), + new_key.offset - datao); + ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9babbc74d145..cf544128985c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1643,6 +1643,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { + struct btrfs_ref ref = { 0 }; + cond_resched(); btrfs_item_key_to_cpu(leaf, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) @@ -1703,10 +1705,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, dirty = 1; key.offset -= btrfs_file_extent_offset(leaf, fi); - ret = btrfs_inc_extent_ref(trans, root, new_bytenr, - num_bytes, parent, - btrfs_header_owner(leaf), - key.objectid, key.offset); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, + num_bytes, parent); + ref.real_root = root->root_key.objectid; + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), + key.objectid, key.offset); + ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); break; @@ -1756,6 +1760,7 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, struct btrfs_fs_info *fs_info = dest->fs_info; struct extent_buffer *eb; struct extent_buffer *parent; + struct btrfs_ref ref = { 0 }; struct btrfs_key key; u64 old_bytenr; u64 new_bytenr; @@ -1916,13 +1921,17 @@ again: path->slots[level], old_ptr_gen); btrfs_mark_buffer_dirty(path->nodes[level]); - ret = btrfs_inc_extent_ref(trans, src, old_bytenr, - blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, + blocksize, path->nodes[level]->start); + ref.skip_qgroup = true; + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); - ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, - blocksize, 0, dest->root_key.objectid, - level - 1, 0); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, + blocksize, 0); + ref.skip_qgroup = true; + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, @@ -2721,6 +2730,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, rc->backref_cache.path[node->level] = node; list_for_each_entry(edge, &node->upper, list[LOWER]) { struct btrfs_key first_key; + struct btrfs_ref ref = { 0 }; cond_resched(); @@ -2826,11 +2836,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(upper->eb); - ret = btrfs_inc_extent_ref(trans, root, - node->eb->start, blocksize, - upper->eb->start, - btrfs_header_owner(upper->eb), - node->level, 0); + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, + node->eb->start, blocksize, + upper->eb->start); + ref.real_root = root->root_key.objectid; + btrfs_init_tree_ref(&ref, node->level, + btrfs_header_owner(upper->eb)); + ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); ret = btrfs_drop_subtree(trans, root, eb, upper->eb); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 75a8e3c78fdb..b94a0cca4ecd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -705,9 +705,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, goto out; if (ins.objectid > 0) { + struct btrfs_ref ref = { 0 }; u64 csum_start; u64 csum_end; LIST_HEAD(ordered_sums); + /* * is this extent already allocated in the extent * allocation tree? If so, just add a reference @@ -715,10 +717,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset); if (ret == 0) { - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - 0, root->root_key.objectid, + btrfs_init_generic_ref(&ref, + BTRFS_ADD_DELAYED_REF, + ins.objectid, ins.offset, 0); + btrfs_init_data_ref(&ref, + root->root_key.objectid, key->objectid, offset); + ret = btrfs_inc_extent_ref(trans, &ref); if (ret) goto out; } else { -- cgit v1.2.3-59-g8ed1b From ffd4bb2a19cd29681f5b70a200654ab92619de8a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:36 +0800 Subject: btrfs: extent-tree: Use btrfs_ref to refactor btrfs_free_extent() Similar to btrfs_inc_extent_ref(), use btrfs_ref to replace the long parameter list and the confusing @owner parameter. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 +---- fs/btrfs/extent-tree.c | 52 ++++++++++++++++++++++---------------------------- fs/btrfs/file.c | 22 +++++++++++---------- fs/btrfs/inode.c | 13 +++++++++---- fs/btrfs/relocation.c | 25 +++++++++++++++--------- 5 files changed, 61 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 820c7425258a..aa557b43d349 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2749,10 +2749,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 flags, int level, int is_data); -int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset); +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c2453ee035d9..2e69e67a6ae9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3195,9 +3195,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); else - ret = btrfs_free_extent(trans, root, bytenr, - num_bytes, parent, ref_root, - key.objectid, key.offset); + ret = btrfs_free_extent(trans, &generic_ref); if (ret) goto fail; } else { @@ -3211,9 +3209,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); else - ret = btrfs_free_extent(trans, root, bytenr, - num_bytes, parent, ref_root, - level - 1, 0); + ret = btrfs_free_extent(trans, &generic_ref); if (ret) goto fail; } @@ -7238,47 +7234,43 @@ out: } /* Can return -ENOMEM */ -int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset) +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_ref generic_ref = { 0 }; + struct btrfs_fs_info *fs_info = trans->fs_info; int old_ref_mod, new_ref_mod; int ret; if (btrfs_is_testing(fs_info)) return 0; - btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, bytenr, - num_bytes, parent); - generic_ref.real_root = root->root_key.objectid; /* * tree log blocks never actually go into the extent allocation * tree, just update pinning info and exit early. */ - if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { - WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); + if ((ref->type == BTRFS_REF_METADATA && + ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || + (ref->type == BTRFS_REF_DATA && + ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { /* unlocks the pinned mutex */ - btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); + btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1); old_ref_mod = new_ref_mod = 0; ret = 0; - } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { - btrfs_init_tree_ref(&generic_ref, (int)owner, root_objectid); - ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, + } else if (ref->type == BTRFS_REF_METADATA) { + ret = btrfs_add_delayed_tree_ref(trans, ref, NULL, &old_ref_mod, &new_ref_mod); } else { - btrfs_init_data_ref(&generic_ref, root_objectid, owner, offset); - ret = btrfs_add_delayed_data_ref(trans, &generic_ref, 0, + ret = btrfs_add_delayed_data_ref(trans, ref, 0, &old_ref_mod, &new_ref_mod); } - if (root_objectid != BTRFS_TREE_LOG_OBJECTID) - btrfs_ref_tree_mod(fs_info, &generic_ref); + if (!((ref->type == BTRFS_REF_METADATA && + ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || + (ref->type == BTRFS_REF_DATA && + ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID))) + btrfs_ref_tree_mod(fs_info, ref); if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) - add_pinned_bytes(fs_info, &generic_ref); + add_pinned_bytes(fs_info, ref); return ret; } @@ -8883,6 +8875,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 parent; struct btrfs_key key; struct btrfs_key first_key; + struct btrfs_ref ref = { 0 }; struct extent_buffer *next; int level = wc->level; int reada = 0; @@ -9055,9 +9048,10 @@ skip: wc->drop_level = level; find_next_key(path, level, &wc->drop_progress); - ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize, - parent, root->root_key.objectid, - level - 1, 0); + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, + fs_info->nodesize, parent); + btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid); + ret = btrfs_free_extent(trans, &ref); if (ret) goto out_unlock; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a4fc89a84baf..7e85dca0e6f2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -997,11 +997,14 @@ delete_extent_item: extent_end = ALIGN(extent_end, fs_info->sectorsize); } else if (update_refs && disk_bytenr > 0) { - ret = btrfs_free_extent(trans, root, - disk_bytenr, num_bytes, 0, + btrfs_init_generic_ref(&ref, + BTRFS_DROP_DELAYED_REF, + disk_bytenr, num_bytes, 0); + btrfs_init_data_ref(&ref, root->root_key.objectid, - key.objectid, key.offset - - extent_offset); + key.objectid, + key.offset - extent_offset); + ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); /* -ENOMEM */ inode_sub_bytes(inode, extent_end - key.offset); @@ -1318,6 +1321,9 @@ again: other_start = end; other_end = 0; + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, + num_bytes, 0); + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset); if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { @@ -1328,9 +1334,7 @@ again: extent_end = other_end; del_slot = path->slots[0] + 1; del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, - ino, orig_offset); + ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -1348,9 +1352,7 @@ again: key.offset = other_start; del_slot = path->slots[0]; del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, - ino, orig_offset); + ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9e736794a6a9..19eebdad798c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4708,12 +4708,17 @@ delete: if (found_extent && (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || root == fs_info->tree_root)) { + struct btrfs_ref ref = { 0 }; + btrfs_set_path_blocking(path); bytes_deleted += extent_num_bytes; - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_bytes, 0, - btrfs_header_owner(leaf), - ino, extent_offset); + + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, + extent_start, extent_num_bytes, 0); + ref.real_root = root->root_key.objectid; + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), + ino, extent_offset); + ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); break; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cf544128985c..a459ecddcce4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1716,9 +1716,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, break; } - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - parent, btrfs_header_owner(leaf), - key.objectid, key.offset); + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, + num_bytes, parent); + ref.real_root = root->root_key.objectid; + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), + key.objectid, key.offset); + ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); break; @@ -1934,14 +1937,18 @@ again: ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); - ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, - path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr, + blocksize, path->nodes[level]->start); + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + ref.skip_qgroup = true; + ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); - ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, - 0, dest->root_key.objectid, level - 1, - 0); + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr, + blocksize, 0); + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + ref.skip_qgroup = true; + ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); btrfs_unlock_up_safe(path, 0); -- cgit v1.2.3-59-g8ed1b From c4140cbf35b90422be6589024f47e132eb2298b1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Apr 2019 14:45:37 +0800 Subject: btrfs: qgroup: Don't scan leaf if we're modifying reloc tree Since reloc tree doesn't contribute to qgroup numbers, just skip them. This should catch the final cause of unnecessary data ref processing when running balance of metadata with qgroups on. The 4G data 16 snapshots test (*) should explain it pretty well: | delayed subtree | refactor delayed ref | this patch --------------------------------------------------------------------- relocated | 22653 | 22673 | 22744 qgroup dirty | 122792 | 48360 | 70 time | 24.494 | 11.606 | 3.944 Finally, we're at the stage where qgroup + metadata balance cost no obvious overhead. Test environment: Test VM: - vRAM 8G - vCPU 8 - block dev vitrio-blk, 'unsafe' cache mode - host block 850evo Test workload: - Copy 4G data from /usr/ to one subvolume - Create 16 snapshots of that subvolume, and modify 3 files in each snapshot - Enable quota, rescan - Time "btrfs balance start -m" Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2e69e67a6ae9..34078f302538 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9141,11 +9141,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, else ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_qgroup_trace_leaf_items(trans, eb); - if (ret) { - btrfs_err_rl(fs_info, - "error %d accounting leaf items. Quota is out of sync, rescan required.", + if (is_fstree(root->root_key.objectid)) { + ret = btrfs_qgroup_trace_leaf_items(trans, eb); + if (ret) { + btrfs_err_rl(fs_info, + "error %d accounting leaf items, quota is out of sync, rescan required", ret); + } } } /* make block locked assertion in btrfs_clean_tree_block happy */ -- cgit v1.2.3-59-g8ed1b From 25263cd7cec1b41b55bf7991d09a019ca1ff1359 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:44:57 +0100 Subject: btrfs: remove unused parameter fs_info from split_item Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e26131d14cc4..928d4d8c542b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4399,8 +4399,7 @@ err: return ret; } -static noinline int split_item(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static noinline int split_item(struct btrfs_path *path, const struct btrfs_key *new_key, unsigned long split_offset) { @@ -4496,7 +4495,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = split_item(root->fs_info, path, new_key, split_offset); + ret = split_item(path, new_key, split_offset); return ret; } -- cgit v1.2.3-59-g8ed1b From 78ac4f9e5ae022bd183ca21da7b373d300b7be17 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:49:12 +0100 Subject: btrfs: remove unused parameter fs_info from btrfs_truncate_item Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 3 +-- fs/btrfs/ctree.h | 3 +-- fs/btrfs/dir-item.c | 3 +-- fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/file-item.c | 4 ++-- fs/btrfs/inode-item.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/btrfs/tree-log.c | 2 +- fs/btrfs/uuid-tree.c | 2 +- fs/btrfs/xattr.c | 3 +-- 10 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 928d4d8c542b..d8252bae0537 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4541,8 +4541,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, * off the end of the item or if we shift the item to chop bytes off * the front. */ -void btrfs_truncate_item(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u32 new_size, int from_end) +void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) { int slot; struct extent_buffer *leaf; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aa557b43d349..b6b570bcadaa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2932,8 +2932,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf); void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u32 data_size); -void btrfs_truncate_item(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u32 new_size, int from_end); +void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end); int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 8de74d835dba..2120b61f61b8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -429,8 +429,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); - btrfs_truncate_item(root->fs_info, path, - item_len - sub_item_len, 1); + btrfs_truncate_item(path, item_len - sub_item_len, 1); } return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 34078f302538..01dbc8bf6808 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1780,7 +1780,6 @@ void update_inline_extent_backref(struct btrfs_path *path, int *last_ref) { struct extent_buffer *leaf = path->nodes[0]; - struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; struct btrfs_extent_data_ref *dref = NULL; struct btrfs_shared_data_ref *sref = NULL; @@ -1835,7 +1834,7 @@ void update_inline_extent_backref(struct btrfs_path *path, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - btrfs_truncate_item(fs_info, path, item_size, 1); + btrfs_truncate_item(path, item_size, 1); } btrfs_mark_buffer_dirty(leaf); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a120d6ba3a28..9f6ceb8eb451 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -547,7 +547,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, */ u32 new_size = (bytenr - key->offset) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(fs_info, path, new_size, 1); + btrfs_truncate_item(path, new_size, 1); } else if (key->offset >= bytenr && csum_end > end_byte && end_byte > key->offset) { /* @@ -559,7 +559,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, u32 new_size = (csum_end - end_byte) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(fs_info, path, new_size, 0); + btrfs_truncate_item(path, new_size, 0); key->offset = end_byte; btrfs_set_item_key_safe(fs_info, path, key); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index a8956a3c9e05..3e0f12d6d094 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + del_len, item_size - (ptr + del_len - item_start)); - btrfs_truncate_item(root->fs_info, path, item_size - del_len, 1); + btrfs_truncate_item(path, item_size - del_len, 1); out: btrfs_free_path(path); @@ -234,7 +234,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_size - (ptr + sub_item_len - item_start)); - btrfs_truncate_item(root->fs_info, path, item_size - sub_item_len, 1); + btrfs_truncate_item(path, item_size - sub_item_len, 1); out: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 19eebdad798c..fb237ffbda1a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4669,7 +4669,7 @@ search_again: btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root->fs_info, path, size, 1); + btrfs_truncate_item(path, size, 1); } else if (!del_item) { /* * We have to bail so the last_size is set to diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b94a0cca4ecd..13aabbf13251 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -465,7 +465,7 @@ insert: found_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); if (found_size > item_size) - btrfs_truncate_item(fs_info, path, item_size, 1); + btrfs_truncate_item(path, item_size, 1); else if (found_size < item_size) btrfs_extend_item(fs_info, path, item_size - found_size); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index c1cc9a5c0024..44838ad3a7f9 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -219,7 +219,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, move_src = offset + sizeof(subid); move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); memmove_extent_buffer(eb, move_dst, move_src, move_len); - btrfs_truncate_item(fs_info, path, item_size - sizeof(subid), 1); + btrfs_truncate_item(path, item_size - sizeof(subid), 1); out: btrfs_free_path(path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 10da873d11f5..c5593d0922f5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -188,8 +188,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, btrfs_extend_item(fs_info, path, size - old_data_len); else if (size < old_data_len) - btrfs_truncate_item(fs_info, path, - data_size, 1); + btrfs_truncate_item(path, data_size, 1); } else { /* There are other xattrs packed in the same item. */ ret = btrfs_delete_one_dir_name(trans, root, path, di); -- cgit v1.2.3-59-g8ed1b From c71dd88007bdc8ba62e99439d93050b0778f101a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 14:51:10 +0100 Subject: btrfs: remove unused parameter fs_info from btrfs_extend_item Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++--- fs/btrfs/ctree.h | 3 +-- fs/btrfs/dir-item.c | 2 +- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 2 +- fs/btrfs/inode-item.c | 4 ++-- fs/btrfs/tree-log.c | 4 +--- fs/btrfs/uuid-tree.c | 2 +- fs/btrfs/xattr.c | 5 ++--- 9 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d8252bae0537..eacd0b80e272 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4639,8 +4639,7 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) /* * make the item pointed to by the path bigger, data_size is the added size. */ -void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - u32 data_size) +void btrfs_extend_item(struct btrfs_path *path, u32 data_size) { int slot; struct extent_buffer *leaf; @@ -4669,7 +4668,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, BUG_ON(slot < 0); if (slot >= nritems) { btrfs_print_leaf(leaf); - btrfs_crit(fs_info, "slot %d too large, nritems %d", + btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", slot, nritems); BUG(); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b6b570bcadaa..93318ab9ceb9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2930,8 +2930,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 new_root_objectid); int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - u32 data_size); +void btrfs_extend_item(struct btrfs_path *path, u32 data_size); void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end); int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 2120b61f61b8..863367c2c620 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -36,7 +36,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle di = btrfs_match_dir_item_name(fs_info, path, name, name_len); if (di) return ERR_PTR(-EEXIST); - btrfs_extend_item(fs_info, path, data_size); + btrfs_extend_item(path, data_size); } else if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 01dbc8bf6808..2ac4ae41cfed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1705,7 +1705,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - btrfs_extend_item(fs_info, path, size); + btrfs_extend_item(path, size); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9f6ceb8eb451..5a3be16ca93d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -848,7 +848,7 @@ again: diff /= csum_size; diff *= csum_size; - btrfs_extend_item(fs_info, path, diff); + btrfs_extend_item(path, diff); ret = 0; goto csum; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 3e0f12d6d094..30d62ef918b9 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -288,7 +288,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, name, name_len, NULL)) goto out; - btrfs_extend_item(root->fs_info, path, ins_len); + btrfs_extend_item(path, ins_len); ret = 0; } if (ret < 0) @@ -347,7 +347,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, goto out; old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); - btrfs_extend_item(fs_info, path, ins_len); + btrfs_extend_item(path, ins_len); ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 13aabbf13251..67cd144e6be1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -344,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret; u32 item_size; u64 saved_i_size = 0; @@ -467,8 +466,7 @@ insert: if (found_size > item_size) btrfs_truncate_item(path, item_size, 1); else if (found_size < item_size) - btrfs_extend_item(fs_info, path, - item_size - found_size); + btrfs_extend_item(path, item_size - found_size); } else if (ret) { return ret; } diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 44838ad3a7f9..91caab63bdf5 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -121,7 +121,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, * An item with that type already exists. * Extend the item and store the new subid at the end. */ - btrfs_extend_item(fs_info, path, sizeof(subid_le)); + btrfs_extend_item(path, sizeof(subid_le)); eb = path->nodes[0]; slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index c5593d0922f5..cb45c02cdc44 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -185,8 +185,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, if (old_data_len + name_len + sizeof(*di) == item_size) { /* No other xattrs packed in the same leaf item. */ if (size > old_data_len) - btrfs_extend_item(fs_info, path, - size - old_data_len); + btrfs_extend_item(path, size - old_data_len); else if (size < old_data_len) btrfs_truncate_item(path, data_size, 1); } else { @@ -194,7 +193,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto out; - btrfs_extend_item(fs_info, path, data_size); + btrfs_extend_item(path, data_size); } item = btrfs_item_nr(slot); -- cgit v1.2.3-59-g8ed1b From c7da9597fe8cadc846fa72f4ddf478bb435a913f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 15:02:46 +0100 Subject: btrfs: remove unused parameter fs_info from tree_move_down Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index eacd0b80e272..e8a24fcb9182 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5224,9 +5224,7 @@ out: return ret; } -static int tree_move_down(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, - int *level) +static int tree_move_down(struct btrfs_path *path, int *level) { struct extent_buffer *eb; @@ -5282,7 +5280,7 @@ static int tree_advance(struct btrfs_fs_info *fs_info, if (*level == 0 || !allow_down) { ret = tree_move_next_or_upnext(path, level, root_level); } else { - ret = tree_move_down(fs_info, path, level); + ret = tree_move_down(path, level); } if (ret >= 0) { if (*level == 0) -- cgit v1.2.3-59-g8ed1b From 179d1e6a3b6a0409e5d411d485dd4623632c42d8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 15:03:48 +0100 Subject: btrfs: remove unused parameter fs_info from from tree_advance Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e8a24fcb9182..c4ae9cfea709 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5269,8 +5269,7 @@ static int tree_move_next_or_upnext(struct btrfs_path *path, * Returns 1 if it had to move up and next. 0 is returned if it moved only next * or down. */ -static int tree_advance(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static int tree_advance(struct btrfs_path *path, int *level, int root_level, int allow_down, struct btrfs_key *key) @@ -5457,7 +5456,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, while (1) { if (advance_left && !left_end_reached) { - ret = tree_advance(fs_info, left_path, &left_level, + ret = tree_advance(left_path, &left_level, left_root_level, advance_left != ADVANCE_ONLY_NEXT, &left_key); @@ -5468,7 +5467,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, advance_left = 0; } if (advance_right && !right_end_reached) { - ret = tree_advance(fs_info, right_path, &right_level, + ret = tree_advance(right_path, &right_level, right_root_level, advance_right != ADVANCE_ONLY_NEXT, &right_key); -- cgit v1.2.3-59-g8ed1b From 033774dc5a90dad5cceb9298fa7517a48ad9cd20 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 15:59:22 +0100 Subject: btrfs: remove unused parameter fs_info from CHECK_FE_ALIGNED Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index b9dd2a6e133f..32eeee5ec497 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -94,7 +94,7 @@ static void file_extent_err(const struct extent_buffer *eb, int slot, * Return 0 if the btrfs_file_extent_##name is aligned to @alignment * Else return 1 */ -#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \ +#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \ ({ \ if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \ file_extent_err((leaf), (slot), \ @@ -179,11 +179,11 @@ static int check_extent_data_item(struct extent_buffer *leaf, item_size, sizeof(*fi)); return -EUCLEAN; } - if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) || - CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) || - CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) || - CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) || - CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize)) + if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) || + CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) || + CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) || + CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || + CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)) return -EUCLEAN; return 0; } -- cgit v1.2.3-59-g8ed1b From 5c5aff98f83abca23b70ac84c8019b28fcd70cdd Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:29:46 +0100 Subject: btrfs: remove unused parameter fs_info from emit_last_fiemap_cache Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9aa79ad794c9..13fca7bfc1f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4510,8 +4510,7 @@ try_submit_last: * In this case, the first extent range will be cached but not emitted. * So we must emit it before ending extent_fiemap(). */ -static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, - struct fiemap_extent_info *fieinfo, +static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache) { int ret; @@ -4718,7 +4717,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } out_free: if (!ret) - ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache); + ret = emit_last_fiemap_cache(fieinfo, &cache); free_extent_map(em); out: btrfs_free_path(path); -- cgit v1.2.3-59-g8ed1b From c6e340bc1c9e3411c40aafca4c69b989530c9347 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:42:34 +0100 Subject: btrfs: remove unused parameter fs_info from btrfs_add_delayed_extent_op Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 3 +-- fs/btrfs/delayed-ref.h | 3 +-- fs/btrfs/extent-tree.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 5c1f2462f757..a73fc23e2961 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -924,8 +924,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, return 0; } -int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op) { diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 7cbb52adc553..c18f93ea88ed 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -340,8 +340,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, u64 reserved, int *old_ref_mod, int *new_ref_mod); -int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2ac4ae41cfed..1eec1123660e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2950,8 +2950,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, extent_op->is_data = is_data ? true : false; extent_op->level = level; - ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr, - num_bytes, extent_op); + ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); if (ret) btrfs_free_delayed_extent_op(extent_op); return ret; -- cgit v1.2.3-59-g8ed1b From f5c8daa5b2ae6de4baa18a95002271cd7f90be90 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:43:36 +0100 Subject: btrfs: remove unused parameter fs_info from btrfs_set_disk_extent_flags Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 +- fs/btrfs/ctree.h | 1 - fs/btrfs/extent-tree.c | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4ae9cfea709..5116c2a1f0f9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -948,7 +948,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (new_flags != 0) { int level = btrfs_header_level(buf); - ret = btrfs_set_disk_extent_flags(trans, fs_info, + ret = btrfs_set_disk_extent_flags(trans, buf->start, buf->len, new_flags, level, 0); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 93318ab9ceb9..b19c7d65fe7d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2746,7 +2746,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref); int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 flags, int level, int is_data); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1eec1123660e..fa09a83a6954 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2933,7 +2933,6 @@ out: } int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 flags, int level, int is_data) { @@ -8804,7 +8803,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, BUG_ON(ret); /* -ENOMEM */ ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start, + ret = btrfs_set_disk_extent_flags(trans, eb->start, eb->len, flag, btrfs_header_level(eb), 0); BUG_ON(ret); /* -ENOMEM */ -- cgit v1.2.3-59-g8ed1b From 03628cdbc64db6262e50d0357960a4e9562676a1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Apr 2019 14:50:51 +0100 Subject: Btrfs: do not start a transaction during fiemap During fiemap, for regular extents (non inline) we need to check if they are shared and if they are, set the shared bit. Checking if an extent is shared requires checking the delayed references of the currently running transaction, since some reference might have not yet hit the extent tree and be only in the in-memory delayed references. However we were using a transaction join for this, which creates a new transaction when there is no transaction currently running. That means that two more potential failures can happen: creating the transaction and committing it. Further, if no write activity is currently happening in the system, and fiemap calls keep being done, we end up creating and committing transactions that do nothing. In some extreme cases this can result in the commit of the transaction created by fiemap to fail with ENOSPC when updating the root item of a subvolume tree because a join does not reserve any space, leading to a trace like the following: heisenberg kernel: ------------[ cut here ]------------ heisenberg kernel: BTRFS: Transaction aborted (error -28) heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 heisenberg kernel: Call Trace: heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] heisenberg kernel: ? _cond_resched+0x15/0x30 heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] heisenberg kernel: kthread+0x112/0x130 heisenberg kernel: ? kthread_bind+0x30/0x30 heisenberg kernel: ret_from_fork+0x35/0x40 heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- Since fiemap (and btrfs_check_shared()) is a read-only operation, do not do a transaction join to avoid the overhead of creating a new transaction (if there is currently no running transaction) and introducing a potential point of failure when the new transaction gets committed, instead use a transaction attach to grab a handle for the currently running transaction if any. Reported-by: Christoph Anton Mitterer Link: https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ Fixes: afce772e87c36c ("btrfs: fix check_shared for fiemap ioctl") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 849b8c767efb..982152d3f920 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, * callers (such as fiemap) which want to know whether the extent is * shared but do not need a ref count. * - * This attempts to allocate a transaction in order to account for - * delayed refs, but continues on even when the alloc fails. + * This attempts to attach to the running transaction in order to account for + * delayed refs, but continues on even when no running transaction exists. * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ @@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) tmp = ulist_alloc(GFP_NOFS); roots = ulist_alloc(GFP_NOFS); if (!tmp || !roots) { - ulist_free(tmp); - ulist_free(roots); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - trans = btrfs_join_transaction(root); + trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { + ret = PTR_ERR(trans); + goto out; + } trans = NULL; down_read(&fs_info->commit_root_sem); } else { @@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) } else { up_read(&fs_info->commit_root_sem); } +out: ulist_free(tmp); ulist_free(roots); return ret; -- cgit v1.2.3-59-g8ed1b From 9f89d5de8631c7930898a601b6612e271aa2261c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Apr 2019 09:29:36 +0100 Subject: Btrfs: send, flush dellaloc in order to avoid data loss When we set a subvolume to read-only mode we do not flush dellaloc for any of its inodes (except if the filesystem is mounted with -o flushoncommit), since it does not affect correctness for any subsequent operations - except for a future send operation. The send operation will not be able to see the delalloc data since the respective file extent items, inode item updates, backreferences, etc, have not hit yet the subvolume and extent trees. Effectively this means data loss, since the send stream will not contain any data from existing delalloc. Another problem from this is that if the writeback starts and finishes while the send operation is in progress, we have the subvolume tree being being modified concurrently which can result in send failing unexpectedly with EIO or hitting runtime errors, assertion failures or hitting BUG_ONs, etc. Simple reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv $ xfs_io -f -c "pwrite -S 0xea 0 108K" /mnt/sv/foo $ btrfs property set /mnt/sv ro true $ btrfs send -f /tmp/send.stream /mnt/sv $ od -t x1 -A d /mnt/sv/foo 0000000 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea * 0110592 $ umount /mnt $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ btrfs receive -f /tmp/send.stream /mnt $ echo $? 0 $ od -t x1 -A d /mnt/sv/foo 0000000 # ---> empty file Since this a problem that affects send only, fix it in send by flushing dellaloc for all the roots used by the send operation before send starts to process the commit roots. This is a problem that affects send since it was introduced (commit 31db9f7c23fbf7 ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive")) but backporting it to older kernels has some dependencies: - For kernels between 3.19 and 4.20, it depends on commit 3cd24c698004d2 ("btrfs: use tagged writepage to mitigate livelock of snapshot") because the function btrfs_start_delalloc_snapshot() does not exist before that commit. So one has to either pick that commit or replace the calls to btrfs_start_delalloc_snapshot() in this patch with calls to btrfs_start_delalloc_inodes(). - For kernels older than 3.19 it also requires commit e5fa8f865b3324 ("Btrfs: ensure send always works on roots without orphans") because it depends on the function ensure_commit_roots_uptodate() which that commits introduced. - No dependencies for 5.0+ kernels. A test case for fstests follows soon. CC: stable@vger.kernel.org # 3.19+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1e9caa552235..12363081f53b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6593,6 +6593,38 @@ commit_trans: return btrfs_commit_transaction(trans); } +/* + * Make sure any existing dellaloc is flushed for any root used by a send + * operation so that we do not miss any data and we do not race with writeback + * finishing and changing a tree while send is using the tree. This could + * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and + * a send operation then uses the subvolume. + * After flushing delalloc ensure_commit_roots_uptodate() must be called. + */ +static int flush_delalloc_roots(struct send_ctx *sctx) +{ + struct btrfs_root *root = sctx->parent_root; + int ret; + int i; + + if (root) { + ret = btrfs_start_delalloc_snapshot(root); + if (ret) + return ret; + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); + } + + for (i = 0; i < sctx->clone_roots_cnt; i++) { + root = sctx->clone_roots[i].root; + ret = btrfs_start_delalloc_snapshot(root); + if (ret) + return ret; + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); + } + + return 0; +} + static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) { spin_lock(&root->root_item_lock); @@ -6817,6 +6849,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) NULL); sort_clone_roots = 1; + ret = flush_delalloc_roots(sctx); + if (ret) + goto out; + ret = ensure_commit_roots_uptodate(sctx); if (ret) goto out; -- cgit v1.2.3-59-g8ed1b From 62d54f3a7fa27ef6a74d6cdf643ce04beba3afa7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Apr 2019 16:43:42 +0100 Subject: Btrfs: fix race between send and deduplication that lead to failures and crashes Send operates on read only trees and expects them to never change while it is using them. This is part of its initial design, and this expection is due to two different reasons: 1) When it was introduced, no operations were allowed to modifiy read-only subvolumes/snapshots (including defrag for example). 2) It keeps send from having an impact on other filesystem operations. Namely send does not need to keep locks on the trees nor needs to hold on to transaction handles and delay transaction commits. This ends up being a consequence of the former reason. However the deduplication feature was introduced later (on September 2013, while send was introduced in July 2012) and it allowed for deduplication with destination files that belong to read-only trees (subvolumes and snapshots). That means that having a send operation (either full or incremental) running in parallel with a deduplication that has the destination inode in one of the trees used by the send operation, can result in tree nodes and leaves getting freed and reused while send is using them. This problem is similar to the problem solved for the root nodes getting freed and reused when a snapshot is made against one tree that is currenly being used by a send operation, fixed in commits [1] and [2]. These commits explain in detail how the problem happens and the explanation is valid for any node or leaf that is not the root of a tree as well. This problem was also discussed and explained recently in a thread [3]. The problem is very easy to reproduce when using send with large trees (snapshots) and just a few concurrent deduplication operations that target files in the trees used by send. A stress test case is being sent for fstests that triggers the issue easily. The most common error to hit is the send ioctl return -EIO with the following messages in dmesg/syslog: [1631617.204075] BTRFS error (device sdc): did not find backref in send_root. inode=63292, offset=0, disk_byte=5228134400 found extent=5228134400 [1631633.251754] BTRFS error (device sdc): parent transid verify failed on 32243712 wanted 24 found 27 The first one is very easy to hit while the second one happens much less frequently, except for very large trees (in that test case, snapshots with 100000 files having large xattrs to get deep and wide trees). Less frequently, at least one BUG_ON can be hit: [1631742.130080] ------------[ cut here ]------------ [1631742.130625] kernel BUG at fs/btrfs/ctree.c:1806! [1631742.131188] invalid opcode: 0000 [#6] SMP DEBUG_PAGEALLOC PTI [1631742.131726] CPU: 1 PID: 13394 Comm: btrfs Tainted: G B D W 5.0.0-rc8-btrfs-next-45 #1 [1631742.132265] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [1631742.133399] RIP: 0010:read_node_slot+0x122/0x130 [btrfs] (...) [1631742.135061] RSP: 0018:ffffb530021ebaa0 EFLAGS: 00010246 [1631742.135615] RAX: ffff93ac8912e000 RBX: 000000000000009d RCX: 0000000000000002 [1631742.136173] RDX: 000000000000009d RSI: ffff93ac564b0d08 RDI: ffff93ad5b48c000 [1631742.136759] RBP: ffffb530021ebb7d R08: 0000000000000001 R09: ffffb530021ebb7d [1631742.137324] R10: ffffb530021eba70 R11: 0000000000000000 R12: ffff93ac87d0a708 [1631742.137900] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 [1631742.138455] FS: 00007f4cdb1528c0(0000) GS:ffff93ad76a80000(0000) knlGS:0000000000000000 [1631742.139010] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1631742.139568] CR2: 00007f5acb3d0420 CR3: 000000012be3e006 CR4: 00000000003606e0 [1631742.140131] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1631742.140719] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1631742.141272] Call Trace: [1631742.141826] ? do_raw_spin_unlock+0x49/0xc0 [1631742.142390] tree_advance+0x173/0x1d0 [btrfs] [1631742.142948] btrfs_compare_trees+0x268/0x690 [btrfs] [1631742.143533] ? process_extent+0x1070/0x1070 [btrfs] [1631742.144088] btrfs_ioctl_send+0x1037/0x1270 [btrfs] [1631742.144645] _btrfs_ioctl_send+0x80/0x110 [btrfs] [1631742.145161] ? trace_sched_stick_numa+0xe0/0xe0 [1631742.145685] btrfs_ioctl+0x13fe/0x3120 [btrfs] [1631742.146179] ? account_entity_enqueue+0xd3/0x100 [1631742.146662] ? reweight_entity+0x154/0x1a0 [1631742.147135] ? update_curr+0x20/0x2a0 [1631742.147593] ? check_preempt_wakeup+0x103/0x250 [1631742.148053] ? do_vfs_ioctl+0xa2/0x6f0 [1631742.148510] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [1631742.148942] do_vfs_ioctl+0xa2/0x6f0 [1631742.149361] ? __fget+0x113/0x200 [1631742.149767] ksys_ioctl+0x70/0x80 [1631742.150159] __x64_sys_ioctl+0x16/0x20 [1631742.150543] do_syscall_64+0x60/0x1b0 [1631742.150931] entry_SYSCALL_64_after_hwframe+0x49/0xbe [1631742.151326] RIP: 0033:0x7f4cd9f5add7 (...) [1631742.152509] RSP: 002b:00007ffe91017708 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [1631742.152892] RAX: ffffffffffffffda RBX: 0000000000000105 RCX: 00007f4cd9f5add7 [1631742.153268] RDX: 00007ffe91017790 RSI: 0000000040489426 RDI: 0000000000000007 [1631742.153633] RBP: 0000000000000007 R08: 00007f4cd9e79700 R09: 00007f4cd9e79700 [1631742.153999] R10: 00007f4cd9e799d0 R11: 0000000000000202 R12: 0000000000000003 [1631742.154365] R13: 0000555dfae53020 R14: 0000000000000000 R15: 0000000000000001 (...) [1631742.156696] ---[ end trace 5dac9f96dcc3fd6b ]--- That BUG_ON happens because while send is using a node, that node is COWed by a concurrent deduplication, gets freed and gets reused as a leaf (because a transaction commit happened in between), so when it attempts to read a slot from the extent buffer, at ctree.c:read_node_slot(), the extent buffer contents were wiped out and it now matches a leaf (which can even belong to some other tree now), hitting the BUG_ON(level == 0). Fix this concurrency issue by not allowing send and deduplication to run in parallel if both operate on the same readonly trees, returning EAGAIN to user space and logging an exlicit warning in dmesg/syslog. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=be6821f82c3cc36e026f5afd10249988852b35ea [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6f2f0b394b54e2b159ef969a0b5274e9bbf82ff2 [3] https://lore.kernel.org/linux-btrfs/CAL3q7H7iqSEEyFaEtpRZw3cp613y+4k2Q8b4W7mweR3tZA05bQ@mail.gmail.com/ CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/ioctl.c | 19 ++++++++++++++++++- fs/btrfs/send.c | 26 ++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b19c7d65fe7d..aeaadeebc1fd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1340,6 +1340,12 @@ struct btrfs_root { * manipulation with the read-only status via SUBVOL_SETFLAGS */ int send_in_progress; + /* + * Number of currently running deduplication operations that have a + * destination inode belonging to this root. Protected by the lock + * root_item_lock. + */ + int dedupe_in_progress; struct btrfs_subvolume_writers *subv_writers; atomic_t will_be_snapshotted; atomic_t snapshot_force_cow; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 19b0ee4e2c70..7755b503b348 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3262,6 +3262,19 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 i, tail_len, chunk_count; + struct btrfs_root *root_dst = BTRFS_I(dst)->root; + + spin_lock(&root_dst->root_item_lock); + if (root_dst->send_in_progress) { + btrfs_warn_rl(root_dst->fs_info, +"cannot deduplicate to root %llu while send operations are using it (%d in progress)", + root_dst->root_key.objectid, + root_dst->send_in_progress); + spin_unlock(&root_dst->root_item_lock); + return -EAGAIN; + } + root_dst->dedupe_in_progress++; + spin_unlock(&root_dst->root_item_lock); tail_len = olen % BTRFS_MAX_DEDUPE_LEN; chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); @@ -3270,7 +3283,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, dst, dst_loff); if (ret) - return ret; + goto out; loff += BTRFS_MAX_DEDUPE_LEN; dst_loff += BTRFS_MAX_DEDUPE_LEN; @@ -3279,6 +3292,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (tail_len > 0) ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff); +out: + spin_lock(&root_dst->root_item_lock); + root_dst->dedupe_in_progress--; + spin_unlock(&root_dst->root_item_lock); return ret; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 12363081f53b..dd38dfe174df 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6640,6 +6640,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) spin_unlock(&root->root_item_lock); } +static void dedupe_in_progress_warn(const struct btrfs_root *root) +{ + btrfs_warn_rl(root->fs_info, +"cannot use root %llu for send while deduplications on it are in progress (%d in progress)", + root->root_key.objectid, root->dedupe_in_progress); +} + long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) { int ret = 0; @@ -6663,6 +6670,11 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) * making it RW. This also protects against deletion. */ spin_lock(&send_root->root_item_lock); + if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) { + dedupe_in_progress_warn(send_root); + spin_unlock(&send_root->root_item_lock); + return -EAGAIN; + } send_root->send_in_progress++; spin_unlock(&send_root->root_item_lock); @@ -6797,6 +6809,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) ret = -EPERM; goto out; } + if (clone_root->dedupe_in_progress) { + dedupe_in_progress_warn(clone_root); + spin_unlock(&clone_root->root_item_lock); + srcu_read_unlock(&fs_info->subvol_srcu, index); + ret = -EAGAIN; + goto out; + } clone_root->send_in_progress++; spin_unlock(&clone_root->root_item_lock); srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -6831,6 +6850,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) ret = -EPERM; goto out; } + if (sctx->parent_root->dedupe_in_progress) { + dedupe_in_progress_warn(sctx->parent_root); + spin_unlock(&sctx->parent_root->root_item_lock); + srcu_read_unlock(&fs_info->subvol_srcu, index); + ret = -EAGAIN; + goto out; + } spin_unlock(&sctx->parent_root->root_item_lock); srcu_read_unlock(&fs_info->subvol_srcu, index); -- cgit v1.2.3-59-g8ed1b From b8aa330d2acb122563be87c42d82c5c8649cf658 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Apr 2019 11:31:06 +0100 Subject: Btrfs: improve performance on fsync of files with multiple hardlinks Commit 41bd6067692382 ("Btrfs: fix fsync of files with multiple hard links in new directories") introduced a path that makes fsync fallback to a full transaction commit in order to avoid losing hard links and new ancestors of the fsynced inode. That path is triggered only when the inode has more than one hard link and either has a new hard link created in the current transaction or the inode was evicted and reloaded in the current transaction. That path ends up getting triggered very often (hundreds of times) during the course of pgbench benchmarks, resulting in performance drops of about 20%. This change restores the performance by not triggering the full transaction commit in those cases, and instead iterate the fs/subvolume tree in search of all possible new ancestors, for all hard links, to log them. Reported-by: Zhao Yuhu Tested-by: James Wang Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 6 -- fs/btrfs/inode.c | 17 ---- fs/btrfs/tree-log.c | 228 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 188 insertions(+), 63 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b16c13d51be0..d5b438706b77 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -147,12 +147,6 @@ struct btrfs_inode { */ u64 last_unlink_trans; - /* - * Track the transaction id of the last transaction used to create a - * hard link for the inode. This is used by the log tree (fsync). - */ - u64 last_link_trans; - /* * Number of bytes outstanding that are going to need csums. This is * used in ENOSPC accounting. diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb237ffbda1a..fcc23e14a86c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3689,21 +3689,6 @@ cache_index: * inode is not a directory, logging its parent unnecessarily. */ BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; - /* - * Similar reasoning for last_link_trans, needs to be set otherwise - * for a case like the following: - * - * mkdir A - * touch foo - * ln foo A/bar - * echo 2 > /proc/sys/vm/drop_caches - * fsync foo - * - * - * Would result in link bar and directory A not existing after the power - * failure. - */ - BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans; path->slots[0]++; if (inode->i_nlink != 1 || @@ -6655,7 +6640,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto fail; } - BTRFS_I(inode)->last_link_trans = trans->transid; d_instantiate(dentry, inode); ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent, true, NULL); @@ -9192,7 +9176,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->index_cnt = (u64)-1; ei->dir_index = 0; ei->last_unlink_trans = 0; - ei->last_link_trans = 0; ei->last_log_commit = 0; spin_lock_init(&ei->lock); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 67cd144e6be1..6adcd8a2c5c7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5832,6 +5832,190 @@ out: return ret; } +static int log_new_ancestors(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_key found_key; + + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); + + while (true) { + struct btrfs_fs_info *fs_info = root->fs_info; + const u64 last_committed = fs_info->last_trans_committed; + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_key search_key; + struct inode *inode; + int ret = 0; + + btrfs_release_path(path); + + search_key.objectid = found_key.offset; + search_key.type = BTRFS_INODE_ITEM_KEY; + search_key.offset = 0; + inode = btrfs_iget(fs_info->sb, &search_key, root, NULL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (BTRFS_I(inode)->generation > last_committed) + ret = btrfs_log_inode(trans, root, BTRFS_I(inode), + LOG_INODE_EXISTS, + 0, LLONG_MAX, ctx); + iput(inode); + if (ret) + return ret; + + if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID) + break; + + search_key.type = BTRFS_INODE_REF_KEY; + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + return ret; + + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + return -ENOENT; + leaf = path->nodes[0]; + slot = path->slots[0]; + } + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != search_key.objectid || + found_key.type != BTRFS_INODE_REF_KEY) + return -ENOENT; + } + return 0; +} + +static int log_new_ancestors_fast(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct dentry *parent, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct dentry *old_parent = NULL; + struct super_block *sb = inode->vfs_inode.i_sb; + int ret = 0; + + while (true) { + if (!parent || d_really_is_negative(parent) || + sb != parent->d_sb) + break; + + inode = BTRFS_I(d_inode(parent)); + if (root != inode->root) + break; + + if (inode->generation > fs_info->last_trans_committed) { + ret = btrfs_log_inode(trans, root, inode, + LOG_INODE_EXISTS, 0, LLONG_MAX, ctx); + if (ret) + break; + } + if (IS_ROOT(parent)) + break; + + parent = dget_parent(parent); + dput(old_parent); + old_parent = parent; + } + dput(old_parent); + + return ret; +} + +static int log_all_new_ancestors(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct dentry *parent, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_root *root = inode->root; + const u64 ino = btrfs_ino(inode); + struct btrfs_path *path; + struct btrfs_key search_key; + int ret; + + /* + * For a single hard link case, go through a fast path that does not + * need to iterate the fs/subvolume tree. + */ + if (inode->vfs_inode.i_nlink < 2) + return log_new_ancestors_fast(trans, inode, parent, ctx); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + search_key.objectid = ino; + search_key.type = BTRFS_INODE_REF_KEY; + search_key.offset = 0; +again: + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + if (ret == 0) + path->slots[0]++; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_key found_key; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != ino || + found_key.type > BTRFS_INODE_EXTREF_KEY) + break; + + /* + * Don't deal with extended references because they are rare + * cases and too complex to deal with (we would need to keep + * track of which subitem we are processing for each item in + * this loop, etc). So just return some error to fallback to + * a transaction commit. + */ + if (found_key.type == BTRFS_INODE_EXTREF_KEY) { + ret = -EMLINK; + goto out; + } + + /* + * Logging ancestors needs to do more searches on the fs/subvol + * tree, so it releases the path as needed to avoid deadlocks. + * Keep track of the last inode ref key and resume from that key + * after logging all new ancestors for the current hard link. + */ + memcpy(&search_key, &found_key, sizeof(search_key)); + + ret = log_new_ancestors(trans, root, path, ctx); + if (ret) + goto out; + btrfs_release_path(path); + goto again; + } + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + /* * helper function around btrfs_log_inode to make sure newly created * parent directories also end up in the log. A minimal inode and backref @@ -5849,11 +6033,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct super_block *sb; - struct dentry *old_parent = NULL; int ret = 0; u64 last_committed = fs_info->last_trans_committed; bool log_dentries = false; - struct btrfs_inode *orig_inode = inode; sb = inode->vfs_inode.i_sb; @@ -5959,54 +6141,20 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, * and has a link count of 2. */ if (inode->last_unlink_trans > last_committed) { - ret = btrfs_log_all_parents(trans, orig_inode, ctx); + ret = btrfs_log_all_parents(trans, inode, ctx); if (ret) goto end_trans; } - /* - * If a new hard link was added to the inode in the current transaction - * and its link count is now greater than 1, we need to fallback to a - * transaction commit, otherwise we can end up not logging all its new - * parents for all the hard links. Here just from the dentry used to - * fsync, we can not visit the ancestor inodes for all the other hard - * links to figure out if any is new, so we fallback to a transaction - * commit (instead of adding a lot of complexity of scanning a btree, - * since this scenario is not a common use case). - */ - if (inode->vfs_inode.i_nlink > 1 && - inode->last_link_trans > last_committed) { - ret = -EMLINK; + ret = log_all_new_ancestors(trans, inode, parent, ctx); + if (ret) goto end_trans; - } - - while (1) { - if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) - break; - - inode = BTRFS_I(d_inode(parent)); - if (root != inode->root) - break; - if (inode->generation > last_committed) { - ret = btrfs_log_inode(trans, root, inode, - LOG_INODE_EXISTS, 0, LLONG_MAX, ctx); - if (ret) - goto end_trans; - } - if (IS_ROOT(parent)) - break; - - parent = dget_parent(parent); - dput(old_parent); - old_parent = parent; - } if (log_dentries) - ret = log_new_dir_dentries(trans, root, orig_inode, ctx); + ret = log_new_dir_dentries(trans, root, inode, ctx); else ret = 0; end_trans: - dput(old_parent); if (ret < 0) { btrfs_set_log_full_commit(trans); ret = 1; -- cgit v1.2.3-59-g8ed1b From 51d470aeaa3a4ac5bdf98743ac1d32008e8dafa4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 22 Apr 2019 16:07:31 +0300 Subject: btrfs: Document btrfs_csum_one_bio Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5a3be16ca93d..d431ea8198e4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -413,6 +413,16 @@ fail: return ret; } +/* + * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio + * @inode: Owner of the data inside the bio + * @bio: Contains the data to be checksummed + * @file_start: offset in file this bio begins to describe + * @contig: Boolean. If true/1 means all bio vecs in this bio are + * contiguous and they begin at @file_start in the file. False/0 + * means this bio can contains potentially discontigous bio vecs + * so the logical offset of each should be calculated separately. + */ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, u64 file_start, int contig) { -- cgit v1.2.3-59-g8ed1b From 02529d7a1077c5d00d990143cbd78aa56bb7c7cb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 24 Apr 2019 15:22:53 +0800 Subject: btrfs: tree-checker: Allow error injection for tree-checker Allowing error injection for btrfs_check_leaf_full() and btrfs_check_node() is useful to test the failure path of btrfs write time tree check. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 32eeee5ec497..748cd1598255 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -15,6 +15,9 @@ * carefully reviewed otherwise so it does not prevent mount of valid images. */ +#include +#include +#include #include "ctree.h" #include "tree-checker.h" #include "disk-io.h" @@ -942,6 +945,7 @@ int btrfs_check_leaf_full(struct extent_buffer *leaf) { return check_leaf(leaf, true); } +ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO); int btrfs_check_leaf_relaxed(struct extent_buffer *leaf) { @@ -1005,3 +1009,4 @@ int btrfs_check_node(struct extent_buffer *node) out: return ret; } +ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); -- cgit v1.2.3-59-g8ed1b From 7c15d41016dc886cc011e3854d855e219759ae68 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 25 Apr 2019 08:55:53 +0800 Subject: btrfs: ctree: Dump the leaf before BUG_ON in btrfs_set_item_key_safe We have a long standing problem with reversed keys that's detected by btrfs_set_item_key_safe. This is hard to reproduce so we'd like to capture more information for later analysis. Let's dump the leaf content before triggering BUG_ON() so that we can have some clue on what's going wrong. The output of tree locks should help us to debug such problem. Sample stacktrace: generic/522 [00:07:05] [26946.113381] run fstests generic/522 at 2019-04-16 00:07:05 [27161.474720] kernel BUG at fs/btrfs/ctree.c:3192! [27161.475923] invalid opcode: 0000 [#1] PREEMPT SMP [27161.477167] CPU: 0 PID: 15676 Comm: fsx Tainted: G W 5.1.0-rc5-default+ #562 [27161.478932] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014 [27161.481099] RIP: 0010:btrfs_set_item_key_safe+0x146/0x1c0 [btrfs] [27161.485369] RSP: 0018:ffffb087499e39b0 EFLAGS: 00010286 [27161.486464] RAX: 00000000ffffffff RBX: ffff941534d80e70 RCX: 0000000000024000 [27161.487929] RDX: 0000000000013039 RSI: ffffb087499e3aa5 RDI: ffffb087499e39c7 [27161.489289] RBP: 000000000000000e R08: ffff9414e0f49008 R09: 0000000000001000 [27161.490807] R10: 0000000000000000 R11: 0000000000000003 R12: ffff9414e0f48e70 [27161.492305] R13: ffffb087499e3aa5 R14: 0000000000000000 R15: 0000000000071000 [27161.493845] FS: 00007f8ea58d0b80(0000) GS:ffff94153d400000(0000) knlGS:0000000000000000 [27161.495608] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [27161.496717] CR2: 00007f8ea57a9000 CR3: 0000000016a33000 CR4: 00000000000006f0 [27161.498100] Call Trace: [27161.498771] __btrfs_drop_extents+0x6ec/0xdf0 [btrfs] [27161.499872] btrfs_log_changed_extents.isra.26+0x3a2/0x9e0 [btrfs] [27161.501114] btrfs_log_inode+0x7ff/0xdc0 [btrfs] [27161.502114] ? __mutex_unlock_slowpath+0x4b/0x2b0 [27161.503172] btrfs_log_inode_parent+0x237/0x9c0 [btrfs] [27161.504348] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] [27161.505374] btrfs_sync_file+0x1b7/0x480 [btrfs] [27161.506371] __x64_sys_msync+0x180/0x210 [27161.507208] do_syscall_64+0x54/0x180 [27161.507932] entry_SYSCALL_64_after_hwframe+0x49/0xbe [27161.508839] RIP: 0033:0x7f8ea5aa9c61 [27161.512616] RSP: 002b:00007ffea2a06498 EFLAGS: 00000246 ORIG_RAX: 000000000000001a [27161.514161] RAX: ffffffffffffffda RBX: 000000000002a938 RCX: 00007f8ea5aa9c61 [27161.515376] RDX: 0000000000000004 RSI: 000000000001c9b2 RDI: 00007f8ea578d000 [27161.516572] RBP: 000000000001c07a R08: fffffffffffffff8 R09: 000000000002a000 [27161.517883] R10: 00007f8ea57a99b2 R11: 0000000000000246 R12: 0000000000000938 [27161.519080] R13: 00007f8ea578d000 R14: 000000000001c9b2 R15: 0000000000000000 [27161.520281] Modules linked in: btrfs libcrc32c xor zstd_decompress zstd_compress xxhash raid6_pq loop [last unloaded: scsi_debug] [27161.522272] ---[ end trace d5afec7ccac6a252 ]--- [27161.523111] RIP: 0010:btrfs_set_item_key_safe+0x146/0x1c0 [btrfs] [27161.527253] RSP: 0018:ffffb087499e39b0 EFLAGS: 00010286 [27161.528192] RAX: 00000000ffffffff RBX: ffff941534d80e70 RCX: 0000000000024000 [27161.529392] RDX: 0000000000013039 RSI: ffffb087499e3aa5 RDI: ffffb087499e39c7 [27161.530607] RBP: 000000000000000e R08: ffff9414e0f49008 R09: 0000000000001000 [27161.531802] R10: 0000000000000000 R11: 0000000000000003 R12: ffff9414e0f48e70 [27161.533018] R13: ffffb087499e3aa5 R14: 0000000000000000 R15: 0000000000071000 [27161.534405] FS: 00007f8ea58d0b80(0000) GS:ffff94153d400000(0000) knlGS:0000000000000000 [27161.536048] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [27161.537210] CR2: 00007f8ea57a9000 CR3: 0000000016a33000 CR4: 00000000000006f0 Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5116c2a1f0f9..5df76c17775a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3185,11 +3185,31 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, slot = path->slots[0]; if (slot > 0) { btrfs_item_key(eb, &disk_key, slot - 1); - BUG_ON(comp_keys(&disk_key, new_key) >= 0); + if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { + btrfs_crit(fs_info, + "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + slot, btrfs_disk_key_objectid(&disk_key), + btrfs_disk_key_type(&disk_key), + btrfs_disk_key_offset(&disk_key), + new_key->objectid, new_key->type, + new_key->offset); + btrfs_print_leaf(eb); + BUG(); + } } if (slot < btrfs_header_nritems(eb) - 1) { btrfs_item_key(eb, &disk_key, slot + 1); - BUG_ON(comp_keys(&disk_key, new_key) <= 0); + if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { + btrfs_crit(fs_info, + "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + slot, btrfs_disk_key_objectid(&disk_key), + btrfs_disk_key_type(&disk_key), + btrfs_disk_key_offset(&disk_key), + new_key->objectid, new_key->type, + new_key->offset); + btrfs_print_leaf(eb); + BUG(); + } } btrfs_cpu_key_to_disk(&disk_key, new_key); -- cgit v1.2.3-59-g8ed1b From f22125e5d8ae136adb99552d97078702e1ee68ab Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:51 +0800 Subject: btrfs: refactor btrfs_set_props to validate externally In preparation to merge multiple transactions when setting the compression flags, split btrfs_set_props() validation part outside of it. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 3 +++ fs/btrfs/props.c | 23 +++++++++++++++++------ fs/btrfs/props.h | 1 + fs/btrfs/xattr.c | 5 +++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7755b503b348..3f9263ddeff8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -284,6 +284,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) binode->flags &= ~BTRFS_INODE_COMPRESS; binode->flags |= BTRFS_INODE_NOCOMPRESS; + /* set no-compression no need to validate prop here */ ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, 0, 0); if (ret && ret != -ENODATA) @@ -299,6 +300,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) binode->flags |= BTRFS_INODE_COMPRESS; binode->flags &= ~BTRFS_INODE_NOCOMPRESS; + /* compress_type is already validated during mount options */ comp = btrfs_compress_type2str(fs_info->compress_type); if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); @@ -309,6 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) goto out_drop; } else { + /* reset prop, no need of validate prop here */ ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, 0, 0); if (ret && ret != -ENODATA) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 44b7bf647ab3..e356dd2a0f73 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -55,6 +55,23 @@ find_prop_handler(const char *name, return NULL; } +int btrfs_validate_prop(const char *name, const char *value, size_t value_len) +{ + const struct prop_handler *handler; + + if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN) + return -EINVAL; + + handler = find_prop_handler(name, NULL); + if (!handler) + return -EINVAL; + + if (value_len == 0) + return 0; + + return handler->validate(value, value_len); +} + static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags) @@ -62,9 +79,6 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const struct prop_handler *handler; int ret; - if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN) - return -EINVAL; - handler = find_prop_handler(name, NULL); if (!handler) return -EINVAL; @@ -85,9 +99,6 @@ static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return ret; } - ret = handler->validate(value, value_len); - if (ret) - return ret; if (trans) ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, value_len, flags); diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index b1a6b233b774..01d2c1899bc7 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -12,6 +12,7 @@ void __init btrfs_props_init(void); int btrfs_set_prop_trans(struct inode *inode, const char *name, const char *value, size_t value_len, int flags); +int btrfs_validate_prop(const char *name, const char *value, size_t value_len); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index cb45c02cdc44..09db8f5f08fe 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -371,7 +371,12 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + int ret; + name = xattr_full_name(handler, name); + ret = btrfs_validate_prop(name, value, size); + if (ret) + return ret; return btrfs_set_prop_trans(inode, name, value, size, flags); } -- cgit v1.2.3-59-g8ed1b From cd31af158b324e5a1f03b53fb46a1e10cde238ab Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:52 +0800 Subject: btrfs: export btrfs_set_prop Make btrfs_set_prop() a non-static function, so that it can be called from btrfs_ioctl_setflags(). We need btrfs_set_prop() instead of btrfs_set_prop_trans() so that we can use the transaction which is already started in the current thread. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 6 +++--- fs/btrfs/props.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index e356dd2a0f73..aedf5a7d69c9 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -72,9 +72,9 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len) return handler->validate(value, value_len); } -static int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, - const char *name, const char *value, size_t value_len, - int flags) +int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const char *value, size_t value_len, + int flags) { const struct prop_handler *handler; int ret; diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 01d2c1899bc7..30b99348977d 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -12,6 +12,9 @@ void __init btrfs_props_init(void); int btrfs_set_prop_trans(struct inode *inode, const char *name, const char *value, size_t value_len, int flags); +int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, + const char *name, const char *value, size_t value_len, + int flags); int btrfs_validate_prop(const char *name, const char *value, size_t value_len); int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); -- cgit v1.2.3-59-g8ed1b From ff9fef559babe4376dd698ceec3d73d0362e48a0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:53 +0800 Subject: btrfs: start transaction in btrfs_ioctl_setflags() Inode attribute can be set through the FS_IOC_SETFLAGS ioctl. This flags also includes compression attribute for which we would set/reset the compression extended attribute. While doing this there is a bit of duplicate code, the following things happens twice: - start/end_transaction - inode_inc_iversion() - current_time update to inode->i_ctime - and btrfs_update_inode() These are updated both at btrfs_ioctl_setflags() and btrfs_set_props() as well. This patch merges these two duplicate codes at btrfs_ioctl_setflags(). Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3f9263ddeff8..87b473dcdd52 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -192,6 +192,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) u64 old_flags; unsigned int old_i_flags; umode_t mode; + const char *comp = NULL; if (!inode_owner_or_capable(inode)) return -EPERM; @@ -283,14 +284,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (fsflags & FS_NOCOMP_FL) { binode->flags &= ~BTRFS_INODE_COMPRESS; binode->flags |= BTRFS_INODE_NOCOMPRESS; - - /* set no-compression no need to validate prop here */ - ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, - 0, 0); - if (ret && ret != -ENODATA) - goto out_drop; } else if (fsflags & FS_COMPR_FL) { - const char *comp; if (IS_SWAPFILE(inode)) { ret = -ETXTBSY; @@ -300,36 +294,47 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) binode->flags |= BTRFS_INODE_COMPRESS; binode->flags &= ~BTRFS_INODE_NOCOMPRESS; - /* compress_type is already validated during mount options */ comp = btrfs_compress_type2str(fs_info->compress_type); if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); - - ret = btrfs_set_prop_trans(inode, "btrfs.compression", comp, - strlen(comp), 0); - if (ret) - goto out_drop; - } else { - /* reset prop, no need of validate prop here */ - ret = btrfs_set_prop_trans(inode, "btrfs.compression", NULL, - 0, 0); - if (ret && ret != -ENODATA) - goto out_drop; binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } - trans = btrfs_start_transaction(root, 1); + /* + * 1 for inode item + * 2 for properties + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_drop; } + if (comp) { + ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, + strlen(comp), 0); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); + } else { + ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, + 0, 0); + if (ret && ret != -ENODATA) { + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } + } + btrfs_sync_inode_flags_to_i_flags(inode); inode_inc_iversion(inode); inode->i_ctime = current_time(inode); ret = btrfs_update_inode(trans, root, inode); + out_end_trans: btrfs_end_transaction(trans); out_drop: if (ret) { -- cgit v1.2.3-59-g8ed1b From 11d3cd5c625892a17297d5256e4758f007d582d5 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:54 +0800 Subject: btrfs: drop useless inode i_flags copy and restore The patch ("btrfs: start transaction in btrfs_ioctl_setflags()") used btrfs_set_prop() instead of btrfs_set_prop_trans() by which now the inode::i_flags update functions such as btrfs_sync_inode_flags_to_i_flags() and btrfs_update_inode() is called in btrfs_ioctl_setflags() instead of btrfs_set_prop_trans()->btrfs_setxattr() as earlier. So the inode::i_flags remains unmodified until the thread has checked all the conditions. So drop the saved inode::i_flags in out_i_flags. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 87b473dcdd52..4f235f4c66c5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -190,7 +190,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) unsigned int fsflags, old_fsflags; int ret; u64 old_flags; - unsigned int old_i_flags; umode_t mode; const char *comp = NULL; @@ -214,7 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); old_flags = binode->flags; - old_i_flags = inode->i_flags; mode = inode->i_mode; fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); @@ -339,7 +337,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) out_drop: if (ret) { binode->flags = old_flags; - inode->i_flags = old_i_flags; } out_unlock: -- cgit v1.2.3-59-g8ed1b From d2b8fcfe43155f23fa3e8148cdaa65345e3c45eb Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:55 +0800 Subject: btrfs: modify local copy of btrfs_inode flags Instead of updating the binode::flags directly, update a local copy, and then at the point of no error, store copy it to the binode::flags. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 57 ++++++++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4f235f4c66c5..add724e95c11 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -189,9 +189,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_trans_handle *trans; unsigned int fsflags, old_fsflags; int ret; - u64 old_flags; umode_t mode; const char *comp = NULL; + u32 binode_flags = binode->flags; if (!inode_owner_or_capable(inode)) return -EPERM; @@ -212,7 +212,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); - old_flags = binode->flags; mode = inode->i_mode; fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); @@ -225,29 +224,29 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } if (fsflags & FS_SYNC_FL) - binode->flags |= BTRFS_INODE_SYNC; + binode_flags |= BTRFS_INODE_SYNC; else - binode->flags &= ~BTRFS_INODE_SYNC; + binode_flags &= ~BTRFS_INODE_SYNC; if (fsflags & FS_IMMUTABLE_FL) - binode->flags |= BTRFS_INODE_IMMUTABLE; + binode_flags |= BTRFS_INODE_IMMUTABLE; else - binode->flags &= ~BTRFS_INODE_IMMUTABLE; + binode_flags &= ~BTRFS_INODE_IMMUTABLE; if (fsflags & FS_APPEND_FL) - binode->flags |= BTRFS_INODE_APPEND; + binode_flags |= BTRFS_INODE_APPEND; else - binode->flags &= ~BTRFS_INODE_APPEND; + binode_flags &= ~BTRFS_INODE_APPEND; if (fsflags & FS_NODUMP_FL) - binode->flags |= BTRFS_INODE_NODUMP; + binode_flags |= BTRFS_INODE_NODUMP; else - binode->flags &= ~BTRFS_INODE_NODUMP; + binode_flags &= ~BTRFS_INODE_NODUMP; if (fsflags & FS_NOATIME_FL) - binode->flags |= BTRFS_INODE_NOATIME; + binode_flags |= BTRFS_INODE_NOATIME; else - binode->flags &= ~BTRFS_INODE_NOATIME; + binode_flags &= ~BTRFS_INODE_NOATIME; if (fsflags & FS_DIRSYNC_FL) - binode->flags |= BTRFS_INODE_DIRSYNC; + binode_flags |= BTRFS_INODE_DIRSYNC; else - binode->flags &= ~BTRFS_INODE_DIRSYNC; + binode_flags &= ~BTRFS_INODE_DIRSYNC; if (fsflags & FS_NOCOW_FL) { if (S_ISREG(mode)) { /* @@ -256,10 +255,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) * status of the file and will not set it. */ if (inode->i_size == 0) - binode->flags |= BTRFS_INODE_NODATACOW - | BTRFS_INODE_NODATASUM; + binode_flags |= BTRFS_INODE_NODATACOW | + BTRFS_INODE_NODATASUM; } else { - binode->flags |= BTRFS_INODE_NODATACOW; + binode_flags |= BTRFS_INODE_NODATACOW; } } else { /* @@ -267,10 +266,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) */ if (S_ISREG(mode)) { if (inode->i_size == 0) - binode->flags &= ~(BTRFS_INODE_NODATACOW - | BTRFS_INODE_NODATASUM); + binode_flags &= ~(BTRFS_INODE_NODATACOW | + BTRFS_INODE_NODATASUM); } else { - binode->flags &= ~BTRFS_INODE_NODATACOW; + binode_flags &= ~BTRFS_INODE_NODATACOW; } } @@ -280,8 +279,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) * things smaller. */ if (fsflags & FS_NOCOMP_FL) { - binode->flags &= ~BTRFS_INODE_COMPRESS; - binode->flags |= BTRFS_INODE_NOCOMPRESS; + binode_flags &= ~BTRFS_INODE_COMPRESS; + binode_flags |= BTRFS_INODE_NOCOMPRESS; } else if (fsflags & FS_COMPR_FL) { if (IS_SWAPFILE(inode)) { @@ -289,14 +288,14 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) goto out_unlock; } - binode->flags |= BTRFS_INODE_COMPRESS; - binode->flags &= ~BTRFS_INODE_NOCOMPRESS; + binode_flags |= BTRFS_INODE_COMPRESS; + binode_flags &= ~BTRFS_INODE_NOCOMPRESS; comp = btrfs_compress_type2str(fs_info->compress_type); if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); } else { - binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); + binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } /* @@ -306,7 +305,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - goto out_drop; + goto out_unlock; } if (comp) { @@ -327,6 +326,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } } + binode->flags = binode_flags; btrfs_sync_inode_flags_to_i_flags(inode); inode_inc_iversion(inode); inode->i_ctime = current_time(inode); @@ -334,11 +334,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) out_end_trans: btrfs_end_transaction(trans); - out_drop: - if (ret) { - binode->flags = old_flags; - } - out_unlock: inode_unlock(inode); mnt_drop_write_file(file); -- cgit v1.2.3-59-g8ed1b From 3c8d8b635780c244aa44ad2d625e6308fbfb7ad4 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:56 +0800 Subject: btrfs: drop old_fsflags in btrfs_ioctl_setflags btrfs_inode_flags_to_fsflags() is copied into @old_fsflags and used only once. Instead used it directly. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index add724e95c11..d459c37578ea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - unsigned int fsflags, old_fsflags; + unsigned int fsflags; int ret; umode_t mode; const char *comp = NULL; @@ -215,8 +215,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) mode = inode->i_mode; fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); - old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); - if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) & + (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { ret = -EPERM; goto out_unlock; -- cgit v1.2.3-59-g8ed1b From 44e5194b5ec1b179c59bb204a2448031ff917e02 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:57 +0800 Subject: btrfs: drop local copy of inode i_mode There isn't real use of making struct inode::i_mode a local copy, it saves a dereference one time, not much. Just use it directly. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d459c37578ea..6dafa857bbb9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -189,7 +189,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_trans_handle *trans; unsigned int fsflags; int ret; - umode_t mode; const char *comp = NULL; u32 binode_flags = binode->flags; @@ -212,8 +211,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); - mode = inode->i_mode; - fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { @@ -248,7 +245,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) else binode_flags &= ~BTRFS_INODE_DIRSYNC; if (fsflags & FS_NOCOW_FL) { - if (S_ISREG(mode)) { + if (S_ISREG(inode->i_mode)) { /* * It's safe to turn csums off here, no extents exist. * Otherwise we want the flag to reflect the real COW @@ -264,7 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) /* * Revert back under same assumptions as above */ - if (S_ISREG(mode)) { + if (S_ISREG(inode->i_mode)) { if (inode->i_size == 0) binode_flags &= ~(BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM); -- cgit v1.2.3-59-g8ed1b From b3f6a4be1333eb888f9ad1ca16548fbeb05a8732 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:58 +0800 Subject: btrfs: start transaction in xattr_handler_set_prop btrfs specific extended attributes on the inode are set using btrfs_xattr_handler_set_prop(), and the required transaction for this update is started by btrfs_setxattr(). For better visibility of the transaction start and end, do this in btrfs_xattr_handler_set_prop(). For which this patch copied code of btrfs_setxattr() as it is in the original, which needs proper error handling. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 09db8f5f08fe..78b6ba2029e8 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -372,12 +372,31 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, size_t size, int flags) { int ret; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; name = xattr_full_name(handler, name); ret = btrfs_validate_prop(name, value, size); if (ret) return ret; - return btrfs_set_prop_trans(inode, name, value, size, flags); + + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + ret = btrfs_set_prop(trans, inode, name, value, size, flags); + if (!ret) { + inode_inc_iversion(inode); + inode->i_ctime = current_time(inode); + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + } + + btrfs_end_transaction(trans); + + return ret; } static const struct xattr_handler btrfs_security_xattr_handler = { -- cgit v1.2.3-59-g8ed1b From 717ebdc3205ab118dd041199072ec686a3fac7ee Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:48:59 +0800 Subject: btrfs: delete unused function btrfs_set_prop_trans The last consumer of btrfs_set_prop_trans() was taken away by the patch ("btrfs: start transaction in xattr_handler_set_prop") so now this function can be deleted. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 6 ------ fs/btrfs/props.h | 2 -- 2 files changed, 8 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index aedf5a7d69c9..c41ed2a6347e 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -124,12 +124,6 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return 0; } -int btrfs_set_prop_trans(struct inode *inode, const char *name, - const char *value, size_t value_len, int flags) -{ - return btrfs_set_prop(NULL, inode, name, value, value_len, flags); -} - static int iterate_object_props(struct btrfs_root *root, struct btrfs_path *path, u64 objectid, diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 30b99348977d..40b2c65b518c 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -10,8 +10,6 @@ void __init btrfs_props_init(void); -int btrfs_set_prop_trans(struct inode *inode, const char *name, - const char *value, size_t value_len, int flags); int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const char *value, size_t value_len, int flags); -- cgit v1.2.3-59-g8ed1b From da9b6ec829dff9b867bb863ebb5b45b4ef2530a1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 20 Apr 2019 19:49:00 +0800 Subject: btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop Since now the trans argument is never NULL in btrfs_set_prop we don't have to check. So delete it and use btrfs_setxattr that makes use of that. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/props.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index c41ed2a6347e..ca2716917e37 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -84,12 +84,8 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return -EINVAL; if (value_len == 0) { - if (trans) - ret = btrfs_setxattr(trans, inode, handler->xattr_name, - NULL, 0, flags); - else - ret = btrfs_setxattr_trans(inode, handler->xattr_name, - NULL, 0, flags); + ret = btrfs_setxattr(trans, inode, handler->xattr_name, + NULL, 0, flags); if (ret) return ret; @@ -99,23 +95,14 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, return ret; } - if (trans) - ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, - value_len, flags); - else - ret = btrfs_setxattr_trans(inode, handler->xattr_name, value, - value_len, flags); - + ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, + value_len, flags); if (ret) return ret; ret = handler->apply(inode, value, value_len); if (ret) { - if (trans) - btrfs_setxattr(trans, inode, handler->xattr_name, NULL, - 0, flags); - else - btrfs_setxattr_trans(inode, handler->xattr_name, NULL, - 0, flags); + btrfs_setxattr(trans, inode, handler->xattr_name, NULL, + 0, flags); return ret; } -- cgit v1.2.3-59-g8ed1b From 4297ff84dc24d120753e0425702e8ad9b80ed10f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 10 Apr 2019 15:56:09 -0400 Subject: btrfs: track DIO bytes in flight When diagnosing a slowdown of generic/224 I noticed we were not doing anything when calling into shrink_delalloc(). This is because all writes in 224 are O_DIRECT, not delalloc, and thus our delalloc_bytes counter is 0, which short circuits most of the work inside of shrink_delalloc(). However O_DIRECT writes still consume metadata resources and generate ordered extents, which we can still wait on. Fix this by tracking outstanding DIO write bytes, and use this as well as the delalloc bytes counter to decide if we need to lookup and wait on any ordered extents. If we have more DIO writes than delalloc bytes we'll go ahead and wait on any ordered extents regardless of our flush state as flushing delalloc is likely to not gain us anything. Signed-off-by: Josef Bacik [ use dio instead of odirect in identifiers ] Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 15 ++++++++++++++- fs/btrfs/extent-tree.c | 15 +++++++++++++-- fs/btrfs/ordered-data.c | 9 ++++++++- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aeaadeebc1fd..b81c331b28fa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1016,6 +1016,7 @@ struct btrfs_fs_info { /* used to keep from writing metadata until there is a nice batch */ struct percpu_counter dirty_metadata_bytes; struct percpu_counter delalloc_bytes; + struct percpu_counter dio_bytes; s32 dirty_metadata_batch; s32 delalloc_batch; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb26b3239827..663efce22d98 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2633,11 +2633,17 @@ int open_ctree(struct super_block *sb, goto fail; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_srcu; } + + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + if (ret) { + err = ret; + goto fail_dio_bytes; + } fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); @@ -3336,6 +3342,8 @@ fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: percpu_counter_destroy(&fs_info->dirty_metadata_bytes); +fail_dio_bytes: + percpu_counter_destroy(&fs_info->dio_bytes); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: @@ -4017,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_sum(&fs_info->delalloc_bytes)); } + if (percpu_counter_sum(&fs_info->dio_bytes)) + btrfs_info(fs_info, "at unmount dio bytes count %lld", + percpu_counter_sum(&fs_info->dio_bytes)); + btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); @@ -4048,6 +4060,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); + percpu_counter_destroy(&fs_info->dio_bytes); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fa09a83a6954..c61cfd0a77ee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4633,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; u64 delalloc_bytes; + u64 dio_bytes; u64 async_pages; u64 items; long time_left; @@ -4648,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, delalloc_bytes = percpu_counter_sum_positive( &fs_info->delalloc_bytes); - if (delalloc_bytes == 0) { + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); + if (delalloc_bytes == 0 && dio_bytes == 0) { if (trans) return; if (wait_ordered) @@ -4656,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, return; } + /* + * If we are doing more ordered than delalloc we need to just wait on + * ordered extents, otherwise we'll waste time trying to flush delalloc + * that likely won't give us the space back we need. + */ + if (dio_bytes > delalloc_bytes) + wait_ordered = true; + loops = 0; - while (delalloc_bytes && loops < 3) { + while ((delalloc_bytes || dio_bytes) && loops < 3) { nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; /* @@ -4707,6 +4717,7 @@ skip_async: } delalloc_bytes = percpu_counter_sum_positive( &fs_info->delalloc_bytes); + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); } } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f6bb6039fa4c..52889da69113 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -195,8 +195,11 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); - if (dio) + if (dio) { + percpu_counter_add_batch(&fs_info->dio_bytes, len, + fs_info->delalloc_batch); set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); + } /* one ref for the tree */ refcount_set(&entry->refs, 1); @@ -468,6 +471,10 @@ void btrfs_remove_ordered_extent(struct inode *inode, if (root != fs_info->tree_root) btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false); + if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) + percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len, + fs_info->delalloc_batch); + tree = &btrfs_inode->ordered_tree; spin_lock_irq(&tree->lock); node = &entry->rb_node; -- cgit v1.2.3-59-g8ed1b From c8eaeac7b734347c3afba7008b7af62f37b9c140 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 10 Apr 2019 15:56:10 -0400 Subject: btrfs: reserve delalloc metadata differently With the per-inode block reserves we started refilling the reserve based on the calculated size of the outstanding csum bytes and extents for the inode, including the amount we were adding with the new operation. However, generic/224 exposed a problem with this approach. With 1000 files all writing at the same time we ended up with a bunch of bytes being reserved but unusable. When you write to a file we reserve space for the csum leaves for those bytes, the number of extent items required to cover those bytes, and a single transaction item for updating the inode at ordered extent finish for that range of bytes. This is held until the ordered extent finishes and we release all of the reserved space. If a second write comes in at this point we would add a single reservation for the new outstanding extent and however many reservations for the csum leaves. At this point we find the delta of how much we have reserved and how much outstanding size this is and attempt to reserve this delta. If the first write finishes it will not release any space, because the space it had reserved for the initial write is still needed for the second write. However some space would have been used, as we have added csums, extent items, and dirtied the inode. Our reserved space would be > 0 but less than the total needed reserved space. This is just for a single inode, now consider generic/224. This has 1000 inodes writing in parallel to a very small file system, 1GiB. In my testing this usually means we get about a 120MiB metadata area to work with, more than enough to allow the writes to continue, but not enough if all of the inodes are stuck trying to reserve the slack space while continuing to hold their leftovers from their initial writes. Fix this by pre-reserved _only_ for the space we are currently trying to add. Then once that is successful modify our inodes csum count and outstanding extents, and then add the newly reserved space to the inodes block_rsv. This allows us to actually pass generic/224 without running out of metadata space. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 144 ++++++++++++++++++------------------------------- 1 file changed, 52 insertions(+), 92 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c61cfd0a77ee..f79e477a378e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5715,85 +5715,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, return ret; } -static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv, - u64 *metadata_bytes, u64 *qgroup_bytes) -{ - *metadata_bytes = 0; - *qgroup_bytes = 0; - - spin_lock(&block_rsv->lock); - if (block_rsv->reserved < block_rsv->size) - *metadata_bytes = block_rsv->size - block_rsv->reserved; - if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) - *qgroup_bytes = block_rsv->qgroup_rsv_size - - block_rsv->qgroup_rsv_reserved; - spin_unlock(&block_rsv->lock); -} - -/** - * btrfs_inode_rsv_refill - refill the inode block rsv. - * @inode - the inode we are refilling. - * @flush - the flushing restriction. - * - * Essentially the same as btrfs_block_rsv_refill, except it uses the - * block_rsv->size as the minimum size. We'll either refill the missing amount - * or return if we already have enough space. This will also handle the reserve - * tracepoint for the reserved amount. - */ -static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, - enum btrfs_reserve_flush_enum flush) -{ - struct btrfs_root *root = inode->root; - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; - u64 num_bytes, last = 0; - u64 qgroup_num_bytes; - int ret = -ENOSPC; - - calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes); - if (num_bytes == 0) - return 0; - - do { - ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, - true); - if (ret) - return ret; - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); - if (ret) { - btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); - last = num_bytes; - /* - * If we are fragmented we can end up with a lot of - * outstanding extents which will make our size be much - * larger than our reserved amount. - * - * If the reservation happens here, it might be very - * big though not needed in the end, if the delalloc - * flushing happens. - * - * If this is the case try and do the reserve again. - */ - if (flush == BTRFS_RESERVE_FLUSH_ALL) - calc_refill_bytes(block_rsv, &num_bytes, - &qgroup_num_bytes); - if (num_bytes == 0) - return 0; - } - } while (ret && last != num_bytes); - - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, false); - trace_btrfs_space_reservation(root->fs_info, "delalloc", - btrfs_ino(inode), num_bytes, 1); - - /* Don't forget to increase qgroup_rsv_reserved */ - spin_lock(&block_rsv->lock); - block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; - spin_unlock(&block_rsv->lock); - } - return ret; -} - static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes, u64 *qgroup_to_release) @@ -6094,9 +6015,25 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } +static void calc_inode_reservations(struct btrfs_fs_info *fs_info, + u64 num_bytes, u64 *meta_reserve, + u64 *qgroup_reserve) +{ + u64 nr_extents = count_max_extents(num_bytes); + u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); + + /* We add one for the inode update at finish ordered time */ + *meta_reserve = btrfs_calc_trans_metadata_size(fs_info, + nr_extents + csum_leaves + 1); + *qgroup_reserve = nr_extents * fs_info->nodesize; +} + int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 meta_reserve, qgroup_reserve; unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; @@ -6126,7 +6063,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, fs_info->sectorsize); - /* Add our new extents and calculate the new rsv size. */ + /* + * We always want to do it this way, every other way is wrong and ends + * in tears. Pre-reserving the amount we are going to add will always + * be the right way, because otherwise if we have enough parallelism we + * could end up with thousands of inodes all holding little bits of + * reservations they were able to make previously and the only way to + * reclaim that space is to ENOSPC out the operations and clear + * everything out and try again, which is bad. This way we just + * over-reserve slightly, and clean up the mess when we are done. + */ + calc_inode_reservations(fs_info, num_bytes, &meta_reserve, + &qgroup_reserve); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); + if (ret) + goto out_fail; + ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); + if (ret) + goto out_qgroup; + + /* + * Now we need to update our outstanding extents and csum bytes _first_ + * and then add the reservation to the block_rsv. This keeps us from + * racing with an ordered completion or some such that would think it + * needs to free the reservation we just made. + */ spin_lock(&inode->lock); nr_extents = count_max_extents(num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); @@ -6134,22 +6095,21 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); - ret = btrfs_inode_rsv_refill(inode, flush); - if (unlikely(ret)) - goto out_fail; + /* Now we can safely add our space to our block rsv */ + block_rsv_add_bytes(block_rsv, meta_reserve, false); + trace_btrfs_space_reservation(root->fs_info, "delalloc", + btrfs_ino(inode), meta_reserve, 1); + + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_reserve; + spin_unlock(&block_rsv->lock); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return 0; - +out_qgroup: + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); out_fail: - spin_lock(&inode->lock); - nr_extents = count_max_extents(num_bytes); - btrfs_mod_outstanding_extents(inode, -nr_extents); - inode->csum_bytes -= num_bytes; - btrfs_calculate_inode_block_rsv_size(fs_info, inode); - spin_unlock(&inode->lock); - btrfs_inode_rsv_release(inode, true); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); -- cgit v1.2.3-59-g8ed1b From 97db120451e20c65bc5f2addfd63e1e8a3f0a39e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:24 +0200 Subject: btrfs: Preallocate chunks in cow_file_range_async This commit changes the implementation of cow_file_range_async in order to get rid of the BUG_ON in the middle of the loop. Additionally it reworks the inner loop in the hopes of making it more understandable. The idea is to make async_cow be a top-level structured, shared amongst all chunks being sent for compression. This allows to perform one memory allocation at the beginning and gracefully fail the IO if there isn't enough memory. Now, each chunk is going to be described by an async_chunk struct. It's the responsibility of the final chunk to actually free the memory. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 108 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fcc23e14a86c..1181abb54818 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -355,7 +355,7 @@ struct async_extent { struct list_head list; }; -struct async_cow { +struct async_chunk { struct inode *inode; struct btrfs_fs_info *fs_info; struct page *locked_page; @@ -364,9 +364,16 @@ struct async_cow { unsigned int write_flags; struct list_head extents; struct btrfs_work work; + atomic_t *pending; +}; + +struct async_cow { + /* Number of chunks in flight; must be first in the structure */ + atomic_t num_chunks; + struct async_chunk chunks[]; }; -static noinline int add_async_extent(struct async_cow *cow, +static noinline int add_async_extent(struct async_chunk *cow, u64 start, u64 ram_size, u64 compressed_size, struct page **pages, @@ -436,7 +443,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode, static noinline void compress_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, - struct async_cow *async_cow, + struct async_chunk *async_cow, int *num_added) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -702,7 +709,7 @@ static void free_async_extent_pages(struct async_extent *async_extent) * queued. We walk all the async extents created by compress_file_range * and send them down to the disk. */ -static noinline void submit_compressed_extents(struct async_cow *async_cow) +static noinline void submit_compressed_extents(struct async_chunk *async_cow) { struct inode *inode = async_cow->inode; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -1121,9 +1128,10 @@ out_unlock: */ static noinline void async_cow_start(struct btrfs_work *work) { - struct async_cow *async_cow; + struct async_chunk *async_cow; int num_added = 0; - async_cow = container_of(work, struct async_cow, work); + + async_cow = container_of(work, struct async_chunk, work); compress_file_range(async_cow->inode, async_cow->locked_page, async_cow->start, async_cow->end, async_cow, @@ -1140,10 +1148,10 @@ static noinline void async_cow_start(struct btrfs_work *work) static noinline void async_cow_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; - struct async_cow *async_cow; + struct async_chunk *async_cow; unsigned long nr_pages; - async_cow = container_of(work, struct async_cow, work); + async_cow = container_of(work, struct async_chunk, work); fs_info = async_cow->fs_info; nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> @@ -1166,11 +1174,17 @@ static noinline void async_cow_submit(struct btrfs_work *work) static noinline void async_cow_free(struct btrfs_work *work) { - struct async_cow *async_cow; - async_cow = container_of(work, struct async_cow, work); + struct async_chunk *async_cow; + + async_cow = container_of(work, struct async_chunk, work); if (async_cow->inode) btrfs_add_delayed_iput(async_cow->inode); - kfree(async_cow); + /* + * Since the pointer to 'pending' is at the beginning of the array of + * async_cow's, freeing it ensures the whole array has been freed. + */ + if (atomic_dec_and_test(async_cow->pending)) + kfree(async_cow->pending); } static int cow_file_range_async(struct inode *inode, struct page *locked_page, @@ -1179,45 +1193,71 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, unsigned int write_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct async_cow *async_cow; + struct async_cow *ctx; + struct async_chunk *async_chunk; unsigned long nr_pages; u64 cur_end; + u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K); + int i; + bool should_compress; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 1, 0, NULL); - while (start < end) { - async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); - BUG_ON(!async_cow); /* -ENOMEM */ + + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && + !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { + num_chunks = 1; + should_compress = false; + } else { + should_compress = true; + } + + ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS); + if (!ctx) { + unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING; + unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | + PAGE_SET_ERROR; + + extent_clear_unlock_delalloc(inode, start, end, 0, locked_page, + clear_bits, page_ops); + return -ENOMEM; + } + + async_chunk = ctx->chunks; + atomic_set(&ctx->num_chunks, num_chunks); + + for (i = 0; i < num_chunks; i++) { + if (should_compress) + cur_end = min(end, start + SZ_512K - 1); + else + cur_end = end; + /* * igrab is called higher up in the call chain, take only the * lightweight reference for the callback lifetime */ ihold(inode); - async_cow->inode = inode; - async_cow->fs_info = fs_info; - async_cow->locked_page = locked_page; - async_cow->start = start; - async_cow->write_flags = write_flags; - - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && - !btrfs_test_opt(fs_info, FORCE_COMPRESS)) - cur_end = end; - else - cur_end = min(end, start + SZ_512K - 1); - - async_cow->end = cur_end; - INIT_LIST_HEAD(&async_cow->extents); - - btrfs_init_work(&async_cow->work, + async_chunk[i].pending = &ctx->num_chunks; + async_chunk[i].inode = inode; + async_chunk[i].start = start; + async_chunk[i].end = cur_end; + async_chunk[i].fs_info = fs_info; + async_chunk[i].locked_page = locked_page; + async_chunk[i].write_flags = write_flags; + INIT_LIST_HEAD(&async_chunk[i].extents); + + btrfs_init_work(&async_chunk[i].work, btrfs_delalloc_helper, async_cow_start, async_cow_submit, async_cow_free); - nr_pages = (cur_end - start + PAGE_SIZE) >> - PAGE_SHIFT; + nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); atomic_add(nr_pages, &fs_info->async_delalloc_pages); - btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); + btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work); *nr_written += nr_pages; start = cur_end + 1; -- cgit v1.2.3-59-g8ed1b From b5326271e791ea98b97765cc45cff4d948c7c5a4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:25 +0200 Subject: btrfs: Rename async_cow to async_chunk Now that we have an explicit async_chunk struct rename references to variables of this type to async_chunk. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 60 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1181abb54818..958dc7aa453a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -443,7 +443,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode, static noinline void compress_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, - struct async_chunk *async_cow, + struct async_chunk *async_chunk, int *num_added) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -626,7 +626,7 @@ cont: * allocation on disk for these compressed pages, and * will submit them to the elevator. */ - add_async_extent(async_cow, start, total_in, + add_async_extent(async_chunk, start, total_in, total_compressed, pages, nr_pages, compress_type); @@ -673,7 +673,7 @@ cleanup_and_bail_uncompressed: if (redirty) extent_range_redirty_for_io(inode, start, end); - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, + add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; @@ -709,9 +709,9 @@ static void free_async_extent_pages(struct async_extent *async_extent) * queued. We walk all the async extents created by compress_file_range * and send them down to the disk. */ -static noinline void submit_compressed_extents(struct async_chunk *async_cow) +static noinline void submit_compressed_extents(struct async_chunk *async_chunk) { - struct inode *inode = async_cow->inode; + struct inode *inode = async_chunk->inode; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct async_extent *async_extent; u64 alloc_hint = 0; @@ -722,8 +722,8 @@ static noinline void submit_compressed_extents(struct async_chunk *async_cow) int ret = 0; again: - while (!list_empty(&async_cow->extents)) { - async_extent = list_entry(async_cow->extents.next, + while (!list_empty(&async_chunk->extents)) { + async_extent = list_entry(async_chunk->extents.next, struct async_extent, list); list_del(&async_extent->list); @@ -740,7 +740,7 @@ retry: async_extent->ram_size - 1); /* allocate blocks */ - ret = cow_file_range(inode, async_cow->locked_page, + ret = cow_file_range(inode, async_chunk->locked_page, async_extent->start, async_extent->start + async_extent->ram_size - 1, @@ -764,7 +764,7 @@ retry: async_extent->ram_size - 1, WB_SYNC_ALL); else if (ret) - unlock_page(async_cow->locked_page); + unlock_page(async_chunk->locked_page); kfree(async_extent); cond_resched(); continue; @@ -851,7 +851,7 @@ retry: ins.objectid, ins.offset, async_extent->pages, async_extent->nr_pages, - async_cow->write_flags)) { + async_chunk->write_flags)) { struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; const u64 end = start + async_extent->ram_size - 1; @@ -1128,17 +1128,17 @@ out_unlock: */ static noinline void async_cow_start(struct btrfs_work *work) { - struct async_chunk *async_cow; + struct async_chunk *async_chunk; int num_added = 0; - async_cow = container_of(work, struct async_chunk, work); + async_chunk = container_of(work, struct async_chunk, work); - compress_file_range(async_cow->inode, async_cow->locked_page, - async_cow->start, async_cow->end, async_cow, + compress_file_range(async_chunk->inode, async_chunk->locked_page, + async_chunk->start, async_chunk->end, async_chunk, &num_added); if (num_added == 0) { - btrfs_add_delayed_iput(async_cow->inode); - async_cow->inode = NULL; + btrfs_add_delayed_iput(async_chunk->inode); + async_chunk->inode = NULL; } } @@ -1148,13 +1148,13 @@ static noinline void async_cow_start(struct btrfs_work *work) static noinline void async_cow_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; - struct async_chunk *async_cow; + struct async_chunk *async_chunk; unsigned long nr_pages; - async_cow = container_of(work, struct async_chunk, work); + async_chunk = container_of(work, struct async_chunk, work); - fs_info = async_cow->fs_info; - nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> + fs_info = async_chunk->fs_info; + nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> PAGE_SHIFT; /* atomic_sub_return implies a barrier */ @@ -1163,28 +1163,28 @@ static noinline void async_cow_submit(struct btrfs_work *work) cond_wake_up_nomb(&fs_info->async_submit_wait); /* - * ->inode could be NULL if async_cow_start has failed to compress, + * ->inode could be NULL if async_chunk_start has failed to compress, * in which case we don't have anything to submit, yet we need to * always adjust ->async_delalloc_pages as its paired with the init * happening in cow_file_range_async */ - if (async_cow->inode) - submit_compressed_extents(async_cow); + if (async_chunk->inode) + submit_compressed_extents(async_chunk); } static noinline void async_cow_free(struct btrfs_work *work) { - struct async_chunk *async_cow; + struct async_chunk *async_chunk; - async_cow = container_of(work, struct async_chunk, work); - if (async_cow->inode) - btrfs_add_delayed_iput(async_cow->inode); + async_chunk = container_of(work, struct async_chunk, work); + if (async_chunk->inode) + btrfs_add_delayed_iput(async_chunk->inode); /* * Since the pointer to 'pending' is at the beginning of the array of - * async_cow's, freeing it ensures the whole array has been freed. + * async_chunk's, freeing it ensures the whole array has been freed. */ - if (atomic_dec_and_test(async_cow->pending)) - kfree(async_cow->pending); + if (atomic_dec_and_test(async_chunk->pending)) + kfree(async_chunk->pending); } static int cow_file_range_async(struct inode *inode, struct page *locked_page, -- cgit v1.2.3-59-g8ed1b From c5a68aec4e6890ed019020efaac2ac47b48855ff Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:26 +0200 Subject: btrfs: Remove fs_info from struct async_chunk The associated btrfs_work already contains a reference to the fs_info so use that instead of passing it via async_chunk. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 958dc7aa453a..b594a7468716 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -357,7 +357,6 @@ struct async_extent { struct async_chunk { struct inode *inode; - struct btrfs_fs_info *fs_info; struct page *locked_page; u64 start; u64 end; @@ -1147,13 +1146,11 @@ static noinline void async_cow_start(struct btrfs_work *work) */ static noinline void async_cow_submit(struct btrfs_work *work) { - struct btrfs_fs_info *fs_info; - struct async_chunk *async_chunk; + struct async_chunk *async_chunk = container_of(work, struct async_chunk, + work); + struct btrfs_fs_info *fs_info = btrfs_work_owner(work); unsigned long nr_pages; - async_chunk = container_of(work, struct async_chunk, work); - - fs_info = async_chunk->fs_info; nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> PAGE_SHIFT; @@ -1244,7 +1241,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_chunk[i].inode = inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; - async_chunk[i].fs_info = fs_info; async_chunk[i].locked_page = locked_page; async_chunk[i].write_flags = write_flags; INIT_LIST_HEAD(&async_chunk[i].extents); -- cgit v1.2.3-59-g8ed1b From 1368c6dac7f10a18195fa4ebf072799a727fd4a6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:27 +0200 Subject: btrfs: Make compress_file_range take only struct async_chunk All context this function needs is held within struct async_chunk. Currently we not only pass the struct but also every individual member. This is redundant, simplify it by only passing struct async_chunk and leaving it to compress_file_range to extract the values it requires. No functional changes. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b594a7468716..f83c8edd1703 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -439,14 +439,14 @@ static inline void inode_should_defrag(struct btrfs_inode *inode, * are written in the same order that the flusher thread sent them * down. */ -static noinline void compress_file_range(struct inode *inode, - struct page *locked_page, - u64 start, u64 end, - struct async_chunk *async_chunk, - int *num_added) +static noinline void compress_file_range(struct async_chunk *async_chunk, + int *num_added) { + struct inode *inode = async_chunk->inode; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 blocksize = fs_info->sectorsize; + u64 start = async_chunk->start; + u64 end = async_chunk->end; u64 actual_end; int ret = 0; struct page **pages = NULL; @@ -665,9 +665,9 @@ cleanup_and_bail_uncompressed: * to our extent and set things up for the async work queue to run * cow_file_range to do the normal delalloc dance. */ - if (page_offset(locked_page) >= start && - page_offset(locked_page) <= end) - __set_page_dirty_nobuffers(locked_page); + if (page_offset(async_chunk->locked_page) >= start && + page_offset(async_chunk->locked_page) <= end) + __set_page_dirty_nobuffers(async_chunk->locked_page); /* unlocked later on in the async handlers */ if (redirty) @@ -1132,9 +1132,7 @@ static noinline void async_cow_start(struct btrfs_work *work) async_chunk = container_of(work, struct async_chunk, work); - compress_file_range(async_chunk->inode, async_chunk->locked_page, - async_chunk->start, async_chunk->end, async_chunk, - &num_added); + compress_file_range(async_chunk, &num_added); if (num_added == 0) { btrfs_add_delayed_iput(async_chunk->inode); async_chunk->inode = NULL; -- cgit v1.2.3-59-g8ed1b From 69684c5a88903f4b8b05850d9b89d276b54ec54b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:28 +0200 Subject: btrfs: Replace clear_extent_bit with unlock_extent Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f83c8edd1703..05362559645e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1196,8 +1196,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, int i; bool should_compress; - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, - 1, 0, NULL); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end); if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { -- cgit v1.2.3-59-g8ed1b From 4336650aff746e23d387227db826b39f00a3f3c3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:29 +0200 Subject: btrfs: Set io_tree only once in submit_compressed_extents The inode never changes so it's sufficient to dereference it and get the iotree only once, before the execution of the main loop. No functional changes, only the size of the function is decreased: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-44 (-44) Function old new delta submit_compressed_extents 1240 1196 -44 Total: Before=88476, After=88432, chg -0.05% Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 05362559645e..83bf10bb13bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -717,7 +717,7 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk) struct btrfs_key ins; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_io_tree *io_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; int ret = 0; again: @@ -725,9 +725,6 @@ again: async_extent = list_entry(async_chunk->extents.next, struct async_extent, list); list_del(&async_extent->list); - - io_tree = &BTRFS_I(inode)->io_tree; - retry: /* did the compression code fall back to uncompressed IO? */ if (!async_extent->pages) { -- cgit v1.2.3-59-g8ed1b From 7447555fe7765d7823f5db7760bfdeba035b7bad Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Mar 2019 17:20:30 +0200 Subject: btrfs: Factor out common extent locking code in submit_compressed_extents Irrespective of whether the compress code fell back to uncompressed or a compressed extent has to be submitted, the extent range is always locked. So factor out the common lock_extent call at the beginning of the loop. No functional changes just removes one duplicate lock_extent call. Reviewed-by: Johannes Thumshirn Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 83bf10bb13bb..05ff09e8a200 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -725,16 +725,15 @@ again: async_extent = list_entry(async_chunk->extents.next, struct async_extent, list); list_del(&async_extent->list); + retry: + lock_extent(io_tree, async_extent->start, + async_extent->start + async_extent->ram_size - 1); /* did the compression code fall back to uncompressed IO? */ if (!async_extent->pages) { int page_started = 0; unsigned long nr_written = 0; - lock_extent(io_tree, async_extent->start, - async_extent->start + - async_extent->ram_size - 1); - /* allocate blocks */ ret = cow_file_range(inode, async_chunk->locked_page, async_extent->start, @@ -766,9 +765,6 @@ retry: continue; } - lock_extent(io_tree, async_extent->start, - async_extent->start + async_extent->ram_size - 1); - ret = btrfs_reserve_extent(root, async_extent->ram_size, async_extent->compressed_size, async_extent->compressed_size, -- cgit v1.2.3-59-g8ed1b From b1c16ac978fd40ae636e629bb69a652df7eebdc2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 1 Apr 2019 11:29:57 +0300 Subject: btrfs: Use kvmalloc for allocating compressed path context Recent refactoring of cow_file_range_async means it's now possible to request a rather large physically contiguous memory via kmalloc. The size is dependent on the number of 512k chunks that the compressed range consists of. David reported multiple OOM messages on such large allocations. Fix it by switching to using kvmalloc. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 05ff09e8a200..b6d549c993f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "ctree.h" #include "disk-io.h" @@ -1172,7 +1173,7 @@ static noinline void async_cow_free(struct btrfs_work *work) * async_chunk's, freeing it ensures the whole array has been freed. */ if (atomic_dec_and_test(async_chunk->pending)) - kfree(async_chunk->pending); + kvfree(async_chunk->pending); } static int cow_file_range_async(struct inode *inode, struct page *locked_page, @@ -1188,6 +1189,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K); int i; bool should_compress; + unsigned nofs_flag; unlock_extent(&BTRFS_I(inode)->io_tree, start, end); @@ -1199,7 +1201,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, should_compress = true; } - ctx = kmalloc(struct_size(ctx, chunks, num_chunks), GFP_NOFS); + nofs_flag = memalloc_nofs_save(); + ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); + if (!ctx) { unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | -- cgit v1.2.3-59-g8ed1b