aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/btrfs/relocation.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/relocation.c')
-rw-r--r--fs/btrfs/relocation.c661
1 files changed, 301 insertions, 360 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 995d4b8b1cfd..f65595602aa8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
+#include <linux/error-injection.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -22,6 +23,54 @@
#include "print-tree.h"
#include "delalloc-space.h"
#include "block-group.h"
+#include "backref.h"
+
+/*
+ * Relocation overview
+ *
+ * [What does relocation do]
+ *
+ * The objective of relocation is to relocate all extents of the target block
+ * group to other block groups.
+ * This is utilized by resize (shrink only), profile converting, compacting
+ * space, or balance routine to spread chunks over devices.
+ *
+ * Before | After
+ * ------------------------------------------------------------------
+ * BG A: 10 data extents | BG A: deleted
+ * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated)
+ * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated)
+ *
+ * [How does relocation work]
+ *
+ * 1. Mark the target block group read-only
+ * New extents won't be allocated from the target block group.
+ *
+ * 2.1 Record each extent in the target block group
+ * To build a proper map of extents to be relocated.
+ *
+ * 2.2 Build data reloc tree and reloc trees
+ * Data reloc tree will contain an inode, recording all newly relocated
+ * data extents.
+ * There will be only one data reloc tree for one data block group.
+ *
+ * Reloc tree will be a special snapshot of its source tree, containing
+ * relocated tree blocks.
+ * Each tree referring to a tree block in target block group will get its
+ * reloc tree built.
+ *
+ * 2.3 Swap source tree with its corresponding reloc tree
+ * Each involved tree only refers to new extents after swap.
+ *
+ * 3. Cleanup reloc trees and data reloc tree.
+ * As old extents in the target block group are still referenced by reloc
+ * trees, we need to clean them up before really freeing the target block
+ * group.
+ *
+ * The main complexity is in steps 2.2 and 2.3.
+ *
+ * The entry point of relocation is relocate_block_group() function.
+ */
/*
* backref_node, mapping_node and tree_block start with this
@@ -256,6 +305,7 @@ static void free_backref_node(struct backref_cache *cache,
{
if (node) {
cache->nr_nodes--;
+ btrfs_put_root(node->root);
kfree(node);
}
}
@@ -589,22 +639,7 @@ static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
root = (struct btrfs_root *)node->data;
}
spin_unlock(&rc->reloc_root_tree.lock);
- return root;
-}
-
-static int is_cowonly_root(u64 root_objectid)
-{
- if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
- root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
- root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
- root_objectid == BTRFS_DEV_TREE_OBJECTID ||
- root_objectid == BTRFS_TREE_LOG_OBJECTID ||
- root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
- root_objectid == BTRFS_UUID_TREE_OBJECTID ||
- root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
- root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
- return 1;
- return 0;
+ return btrfs_grab_root(root);
}
static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
@@ -614,10 +649,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
key.objectid = root_objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
- if (is_cowonly_root(root_objectid))
- key.offset = 0;
- else
- key.offset = (u64)-1;
+ key.offset = (u64)-1;
return btrfs_get_fs_root(fs_info, &key, false);
}
@@ -711,8 +743,6 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
err = -ENOMEM;
goto out;
}
- path1->reada = READA_FORWARD;
- path2->reada = READA_FORWARD;
node = alloc_backref_node(cache);
if (!node) {
@@ -899,10 +929,12 @@ again:
/* tree root */
ASSERT(btrfs_root_bytenr(&root->root_item) ==
cur->bytenr);
- if (should_ignore_root(root))
+ if (should_ignore_root(root)) {
+ btrfs_put_root(root);
list_add(&cur->list, &useless);
- else
+ } else {
cur->root = root;
+ }
break;
}
@@ -915,6 +947,7 @@ again:
ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
path2->lowest_level = 0;
if (ret < 0) {
+ btrfs_put_root(root);
err = ret;
goto out;
}
@@ -930,6 +963,7 @@ again:
root->root_key.objectid,
node_key->objectid, node_key->type,
node_key->offset);
+ btrfs_put_root(root);
err = -ENOENT;
goto out;
}
@@ -941,15 +975,18 @@ again:
if (!path2->nodes[level]) {
ASSERT(btrfs_root_bytenr(&root->root_item) ==
lower->bytenr);
- if (should_ignore_root(root))
+ if (should_ignore_root(root)) {
+ btrfs_put_root(root);
list_add(&lower->list, &useless);
- else
+ } else {
lower->root = root;
+ }
break;
}
edge = alloc_backref_edge(cache);
if (!edge) {
+ btrfs_put_root(root);
err = -ENOMEM;
goto out;
}
@@ -959,6 +996,7 @@ again:
if (!rb_node) {
upper = alloc_backref_node(cache);
if (!upper) {
+ btrfs_put_root(root);
free_backref_edge(cache, edge);
err = -ENOMEM;
goto out;
@@ -1006,8 +1044,10 @@ again:
edge->node[LOWER] = lower;
edge->node[UPPER] = upper;
- if (rb_node)
+ if (rb_node) {
+ btrfs_put_root(root);
break;
+ }
lower = upper;
upper = NULL;
}
@@ -1186,7 +1226,7 @@ out:
free_backref_node(cache, lower);
}
- free_backref_node(cache, node);
+ remove_backref_node(cache, node);
return ERR_PTR(err);
}
ASSERT(!node || !node->detached);
@@ -1244,7 +1284,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
new_node->level = node->level;
new_node->lowest = node->lowest;
new_node->checked = 1;
- new_node->root = dest;
+ new_node->root = btrfs_grab_root(dest);
+ ASSERT(new_node->root);
if (!node->lowest) {
list_for_each_entry(edge, &node->lower, list[UPPER]) {
@@ -1298,7 +1339,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
if (!node)
return -ENOMEM;
- node->bytenr = root->node->start;
+ node->bytenr = root->commit_root->start;
node->data = root;
spin_lock(&rc->reloc_root_tree.lock);
@@ -1325,14 +1366,16 @@ static void __del_reloc_root(struct btrfs_root *root)
struct rb_node *rb_node;
struct mapping_node *node = NULL;
struct reloc_control *rc = fs_info->reloc_ctl;
+ bool put_ref = false;
if (rc && root->node) {
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
if (!node)
@@ -1340,9 +1383,22 @@ static void __del_reloc_root(struct btrfs_root *root)
BUG_ON((struct btrfs_root *)node->data != root);
}
+ /*
+ * We only put the reloc root here if it's on the list. There's a lot
+ * of places where the pattern is to splice the rc->reloc_roots, process
+ * the reloc roots, and then add the reloc root back onto
+ * rc->reloc_roots. If we call __del_reloc_root while it's off of the
+ * list we don't want the reference being dropped, because the guy
+ * messing with the list is in charge of the reference.
+ */
spin_lock(&fs_info->trans_lock);
- list_del_init(&root->root_list);
+ if (!list_empty(&root->root_list)) {
+ put_ref = true;
+ list_del_init(&root->root_list);
+ }
spin_unlock(&fs_info->trans_lock);
+ if (put_ref)
+ btrfs_put_root(root);
kfree(node);
}
@@ -1350,7 +1406,7 @@ static void __del_reloc_root(struct btrfs_root *root)
* helper to update the 'address of tree root -> reloc tree'
* mapping
*/
-static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
+static int __update_reloc_root(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *rb_node;
@@ -1359,7 +1415,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1371,7 +1427,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = new_bytenr;
+ node->bytenr = root->node->start;
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
@@ -1447,8 +1503,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
BUG_ON(ret);
kfree(root_item);
- reloc_root = btrfs_read_fs_root(fs_info->tree_root, &root_key);
+ reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
BUG_ON(IS_ERR(reloc_root));
+ set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
reloc_root->last_trans = trans->transid;
return reloc_root;
}
@@ -1456,6 +1513,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
/*
* create reloc tree for a given fs tree. reloc tree is just a
* snapshot of the fs tree with special root objectid.
+ *
+ * The reloc_root comes out of here with two references, one for
+ * root->reloc_root, and another for being on the rc->reloc_roots list.
*/
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
@@ -1467,6 +1527,10 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
int clear_rsv = 0;
int ret;
+ if (!rc || !rc->create_reloc_tree ||
+ root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ return 0;
+
/*
* The subvolume has reloc tree but the swap is finished, no need to
* create/update the dead reloc tree
@@ -1480,10 +1544,6 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
return 0;
}
- if (!rc || !rc->create_reloc_tree ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- return 0;
-
if (!trans->reloc_reserved) {
rsv = trans->block_rsv;
trans->block_rsv = rc->block_rsv;
@@ -1495,7 +1555,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
ret = __add_reloc_root(reloc_root);
BUG_ON(ret < 0);
- root->reloc_root = reloc_root;
+ root->reloc_root = btrfs_grab_root(reloc_root);
return 0;
}
@@ -1516,6 +1576,13 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
+ /*
+ * We are probably ok here, but __del_reloc_root() will drop its ref of
+ * the root. We have the ref for root->reloc_root, but just in case
+ * hold it while we update the reloc root.
+ */
+ btrfs_grab_root(reloc_root);
+
/* root->reloc_root will stay until current relocation finished */
if (fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
@@ -1529,6 +1596,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
}
if (reloc_root->commit_root != reloc_root->node) {
+ __update_reloc_root(reloc_root);
btrfs_set_root_node(root_item, reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
reloc_root->commit_root = btrfs_root_node(reloc_root);
@@ -1537,7 +1605,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
-
+ btrfs_put_root(reloc_root);
out:
return 0;
}
@@ -2211,7 +2279,7 @@ static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
btrfs_update_reloc_root(trans, root);
if (list_empty(&root->reloc_dirty_list)) {
- btrfs_grab_fs_root(root);
+ btrfs_grab_root(root);
list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
}
}
@@ -2231,24 +2299,34 @@ static int clean_dirty_subvols(struct reloc_control *rc)
list_del_init(&root->reloc_dirty_list);
root->reloc_root = NULL;
- if (reloc_root) {
-
- ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
- if (ret2 < 0 && !ret)
- ret = ret2;
- }
/*
* Need barrier to ensure clear_bit() only happens after
* root->reloc_root = NULL. Pairs with have_reloc_root.
*/
smp_wmb();
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
- btrfs_put_fs_root(root);
+ if (reloc_root) {
+ /*
+ * btrfs_drop_snapshot drops our ref we hold for
+ * ->reloc_root. If it fails however we must
+ * drop the ref ourselves.
+ */
+ ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
+ if (ret2 < 0) {
+ btrfs_put_root(reloc_root);
+ if (!ret)
+ ret = ret2;
+ }
+ }
+ btrfs_put_root(root);
} else {
/* Orphan reloc tree, just clean it up */
- ret2 = btrfs_drop_snapshot(root, NULL, 0, 1);
- if (ret2 < 0 && !ret)
- ret = ret2;
+ ret2 = btrfs_drop_snapshot(root, 0, 1);
+ if (ret2 < 0) {
+ btrfs_put_root(root);
+ if (!ret)
+ ret = ret2;
+ }
}
}
return ret;
@@ -2325,6 +2403,18 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
trans = NULL;
goto out;
}
+
+ /*
+ * At this point we no longer have a reloc_control, so we can't
+ * depend on btrfs_init_reloc_root to update our last_trans.
+ *
+ * But that's ok, we started the trans handle on our
+ * corresponding fs_root, which means it's been added to the
+ * dirty list. At commit time we'll still call
+ * btrfs_update_reloc_root() and update our root item
+ * appropriately.
+ */
+ reloc_root->last_trans = trans->transid;
trans->block_rsv = rc->block_rsv;
replaced = 0;
@@ -2435,7 +2525,7 @@ again:
if (IS_ERR(trans)) {
if (!err)
btrfs_block_rsv_release(fs_info, rc->block_rsv,
- num_bytes);
+ num_bytes, NULL);
return PTR_ERR(trans);
}
@@ -2443,7 +2533,7 @@ again:
if (num_bytes != rc->merging_rsv_size) {
btrfs_end_transaction(trans);
btrfs_block_rsv_release(fs_info, rc->block_rsv,
- num_bytes);
+ num_bytes, NULL);
goto again;
}
}
@@ -2468,6 +2558,7 @@ again:
btrfs_update_reloc_root(trans, root);
list_add(&reloc_root->root_list, &reloc_roots);
+ btrfs_put_root(root);
}
list_splice(&reloc_roots, &rc->reloc_roots);
@@ -2488,10 +2579,6 @@ void free_reloc_roots(struct list_head *list)
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
__del_reloc_root(reloc_root);
- free_extent_buffer(reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- reloc_root->node = NULL;
- reloc_root->commit_root = NULL;
}
}
@@ -2529,6 +2616,7 @@ again:
BUG_ON(root->reloc_root != reloc_root);
ret = merge_reloc_root(rc, root);
+ btrfs_put_root(root);
if (ret) {
if (list_empty(&reloc_root->root_list))
list_add_tail(&reloc_root->root_list,
@@ -2561,7 +2649,21 @@ out:
free_reloc_roots(&reloc_roots);
}
- BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ /*
+ * We used to have
+ *
+ * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ *
+ * here, but it's wrong. If we fail to start the transaction in
+ * prepare_to_merge() we will have only 0 ref reloc roots, none of which
+ * have actually been removed from the reloc_root_tree rb tree. This is
+ * fine because we're bailing here, and we hold a reference on the root
+ * for the list that holds it, so these roots will be cleaned up when we
+ * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
+ * will be cleaned up on unmount.
+ *
+ * The remaining nodes will be cleaned up by free_reloc_control.
+ */
}
static void free_block_list(struct rb_root *blocks)
@@ -2580,6 +2682,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = reloc_root->fs_info;
struct btrfs_root *root;
+ int ret;
if (reloc_root->last_trans == trans->transid)
return 0;
@@ -2587,8 +2690,10 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
root = read_fs_root(fs_info, reloc_root->root_key.offset);
BUG_ON(IS_ERR(root));
BUG_ON(root->reloc_root != reloc_root);
+ ret = btrfs_record_root_in_trans(trans, root);
+ btrfs_put_root(root);
- return btrfs_record_root_in_trans(trans, root);
+ return ret;
}
static noinline_for_stack
@@ -2621,7 +2726,9 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
BUG_ON(next->new_bytenr);
BUG_ON(!list_empty(&next->list));
next->new_bytenr = root->node->start;
- next->root = root;
+ btrfs_put_root(next->root);
+ next->root = btrfs_grab_root(root);
+ ASSERT(next->root);
list_add_tail(&next->list,
&rc->backref_cache.changed);
__mark_block_processed(rc, next);
@@ -3040,7 +3147,6 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
{
struct extent_buffer *eb;
- BUG_ON(block->key_ready);
eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
block->level, NULL);
if (IS_ERR(eb)) {
@@ -3073,6 +3179,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
if (!node)
return 0;
+ /*
+ * If we fail here we want to drop our backref_node because we are going
+ * to start over and regenerate the tree for it.
+ */
+ ret = reserve_metadata_space(trans, rc, node);
+ if (ret)
+ goto out;
+
BUG_ON(node->processed);
root = select_one_root(node);
if (root == ERR_PTR(-ENOENT)) {
@@ -3080,12 +3194,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
goto out;
}
- if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
- ret = reserve_metadata_space(trans, rc, node);
- if (ret)
- goto out;
- }
-
if (root) {
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
BUG_ON(node->new_bytenr);
@@ -3093,7 +3201,9 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
btrfs_record_root_in_trans(trans, root);
root = root->reloc_root;
node->new_bytenr = root->node->start;
- node->root = root;
+ btrfs_put_root(node->root);
+ node->root = btrfs_grab_root(root);
+ ASSERT(node->root);
list_add_tail(&node->list, &rc->backref_cache.changed);
} else {
path->lowest_level = node->level;
@@ -3161,9 +3271,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
ret = relocate_tree_block(trans, rc, node, &block->key,
path);
if (ret < 0) {
- if (ret != -EAGAIN || &block->rb_node == rb_first(blocks))
- err = ret;
- goto out;
+ err = ret;
+ break;
}
}
out:
@@ -3264,6 +3373,15 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
return ret;
}
+/*
+ * Allow error injection to test balance cancellation
+ */
+int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
+{
+ return atomic_read(&fs_info->balance_cancel_req);
+}
+ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
+
static int relocate_file_extent_cluster(struct inode *inode,
struct file_extent_cluster *cluster)
{
@@ -3385,6 +3503,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info);
+ if (btrfs_should_cancel_balance(fs_info)) {
+ ret = -ECANCELED;
+ goto out;
+ }
}
WARN_ON(nr != cluster->nr);
out:
@@ -3556,31 +3678,6 @@ out:
return ret;
}
-/*
- * helper to check if the block use full backrefs for pointers in it
- */
-static int block_use_full_backref(struct reloc_control *rc,
- struct extent_buffer *eb)
-{
- u64 flags;
- int ret;
-
- if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
- btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
- return 1;
-
- ret = btrfs_lookup_extent_info(NULL, rc->extent_root->fs_info,
- eb->start, btrfs_header_level(eb), 1,
- NULL, &flags);
- BUG_ON(ret);
-
- if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = 1;
- else
- ret = 0;
- return ret;
-}
-
static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group *block_group,
struct inode *inode,
@@ -3624,172 +3721,40 @@ out:
}
/*
- * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
- * this function scans fs tree to find blocks reference the data extent
+ * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
+ * cache inode, to avoid free space cache data extent blocking data relocation.
*/
-static int find_data_references(struct reloc_control *rc,
- struct btrfs_key *extent_key,
- struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref,
- struct rb_root *blocks)
+static int delete_v1_space_cache(struct extent_buffer *leaf,
+ struct btrfs_block_group *block_group,
+ u64 data_bytenr)
{
- struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- struct btrfs_path *path;
- struct tree_block *block;
- struct btrfs_root *root;
- struct btrfs_file_extent_item *fi;
- struct rb_node *rb_node;
+ u64 space_cache_ino;
+ struct btrfs_file_extent_item *ei;
struct btrfs_key key;
- u64 ref_root;
- u64 ref_objectid;
- u64 ref_offset;
- u32 ref_count;
- u32 nritems;
- int err = 0;
- int added = 0;
- int counted;
+ bool found = false;
+ int i;
int ret;
- ref_root = btrfs_extent_data_ref_root(leaf, ref);
- ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
- ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
- ref_count = btrfs_extent_data_ref_count(leaf, ref);
-
- /*
- * This is an extent belonging to the free space cache, lets just delete
- * it and redo the search.
- */
- if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
- ret = delete_block_group_cache(fs_info, rc->block_group,
- NULL, ref_objectid);
- if (ret != -ENOENT)
- return ret;
- ret = 0;
- }
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = READA_FORWARD;
-
- root = read_fs_root(fs_info, ref_root);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto out;
- }
-
- key.objectid = ref_objectid;
- key.type = BTRFS_EXTENT_DATA_KEY;
- if (ref_offset > ((u64)-1 << 32))
- key.offset = 0;
- else
- key.offset = ref_offset;
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- /*
- * the references in tree blocks that use full backrefs
- * are not counted in
- */
- if (block_use_full_backref(rc, leaf))
- counted = 0;
- else
- counted = 1;
- rb_node = tree_search(blocks, leaf->start);
- if (rb_node) {
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
- }
-
- while (ref_count > 0) {
- while (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (WARN_ON(ret > 0))
- goto out;
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- added = 0;
-
- if (block_use_full_backref(rc, leaf))
- counted = 0;
- else
- counted = 1;
- rb_node = tree_search(blocks, leaf->start);
- if (rb_node) {
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
- }
- }
+ if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
+ return 0;
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (WARN_ON(key.objectid != ref_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY))
+ for (i = 0; i < btrfs_header_nritems(leaf); i++) {
+ btrfs_item_key_to_cpu(leaf, &key, i);
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG &&
+ btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
+ found = true;
+ space_cache_ino = key.objectid;
break;
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- goto next;
-
- if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
- extent_key->objectid)
- goto next;
-
- key.offset -= btrfs_file_extent_offset(leaf, fi);
- if (key.offset != ref_offset)
- goto next;
-
- if (counted)
- ref_count--;
- if (added)
- goto next;
-
- if (!tree_block_processed(leaf->start, rc)) {
- block = kmalloc(sizeof(*block), GFP_NOFS);
- if (!block) {
- err = -ENOMEM;
- break;
- }
- block->bytenr = leaf->start;
- btrfs_item_key_to_cpu(leaf, &block->key, 0);
- block->level = 0;
- block->key_ready = 1;
- rb_node = tree_insert(blocks, block->bytenr,
- &block->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST,
- block->bytenr);
}
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
-next:
- path->slots[0]++;
-
}
-out:
- btrfs_free_path(path);
- return err;
+ if (!found)
+ return -ENOENT;
+ ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
+ space_cache_ino);
+ return ret;
}
/*
@@ -3801,91 +3766,41 @@ int add_data_references(struct reloc_control *rc,
struct btrfs_path *path,
struct rb_root *blocks)
{
- struct btrfs_key key;
- struct extent_buffer *eb;
- struct btrfs_extent_data_ref *dref;
- struct btrfs_extent_inline_ref *iref;
- unsigned long ptr;
- unsigned long end;
- u32 blocksize = rc->extent_root->fs_info->nodesize;
+ struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+ struct ulist *leaves = NULL;
+ struct ulist_iterator leaf_uiter;
+ struct ulist_node *ref_node = NULL;
+ const u32 blocksize = fs_info->nodesize;
int ret = 0;
- int err = 0;
-
- eb = path->nodes[0];
- ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
- end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
- ptr += sizeof(struct btrfs_extent_item);
- while (ptr < end) {
- iref = (struct btrfs_extent_inline_ref *)ptr;
- key.type = btrfs_get_extent_inline_ref_type(eb, iref,
- BTRFS_REF_TYPE_DATA);
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- key.offset = btrfs_extent_inline_ref_offset(eb, iref);
- ret = __add_tree_block(rc, key.offset, blocksize,
- blocks);
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- ret = find_data_references(rc, extent_key,
- eb, dref, blocks);
- } else {
- ret = -EUCLEAN;
- btrfs_err(rc->extent_root->fs_info,
- "extent %llu slot %d has an invalid inline ref type",
- eb->start, path->slots[0]);
- }
- if (ret) {
- err = ret;
- goto out;
- }
- ptr += btrfs_extent_inline_ref_size(key.type);
- }
- WARN_ON(ptr > end);
+ btrfs_release_path(path);
+ ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
+ 0, &leaves, NULL, true);
+ if (ret < 0)
+ return ret;
- while (1) {
- cond_resched();
- eb = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret < 0) {
- err = ret;
- break;
- }
- if (ret > 0)
- break;
- eb = path->nodes[0];
- }
+ ULIST_ITER_INIT(&leaf_uiter);
+ while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
+ struct extent_buffer *eb;
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
- if (key.objectid != extent_key->objectid)
+ eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
+ if (IS_ERR(eb)) {
+ ret = PTR_ERR(eb);
break;
-
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- ret = __add_tree_block(rc, key.offset, blocksize,
- blocks);
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_extent_data_ref);
- ret = find_data_references(rc, extent_key,
- eb, dref, blocks);
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
- btrfs_print_v0_err(eb->fs_info);
- btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
- ret = -EINVAL;
- } else {
- ret = 0;
}
- if (ret) {
- err = ret;
+ ret = delete_v1_space_cache(eb, rc->block_group,
+ extent_key->objectid);
+ free_extent_buffer(eb);
+ if (ret < 0)
+ break;
+ ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
+ if (ret < 0)
break;
- }
- path->slots[0]++;
}
-out:
- btrfs_release_path(path);
- if (err)
+ if (ret < 0)
free_block_list(blocks);
- return err;
+ ulist_free(leaves);
+ return ret;
}
/*
@@ -4137,12 +4052,6 @@ restart:
if (!RB_EMPTY_ROOT(&blocks)) {
ret = relocate_tree_blocks(trans, rc, &blocks);
if (ret < 0) {
- /*
- * if we fail to relocate tree blocks, force to update
- * backref cache when committing transaction.
- */
- rc->backref_cache.last_trans = trans->transid - 1;
-
if (ret != -EAGAIN) {
err = ret;
break;
@@ -4166,6 +4075,10 @@ restart:
break;
}
}
+ if (btrfs_should_cancel_balance(fs_info)) {
+ err = -ECANCELED;
+ break;
+ }
}
if (trans && progress && err == -ENOSPC) {
ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
@@ -4195,15 +4108,23 @@ restart:
set_reloc_control(rc);
backref_cache_cleanup(&rc->backref_cache);
- btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
+ btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
+ /*
+ * Even in the case when the relocation is cancelled, we should all go
+ * through prepare_to_merge() and merge_reloc_roots().
+ *
+ * For error (including cancelled balance), prepare_to_merge() will
+ * mark all reloc trees orphan, then queue them for cleanup in
+ * merge_reloc_roots()
+ */
err = prepare_to_merge(rc, err);
merge_reloc_roots(rc);
rc->merge_reloc_tree = 0;
unset_reloc_control(rc);
- btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
+ btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
/* get rid of pinned extents */
trans = btrfs_join_transaction(rc->extent_root);
@@ -4212,10 +4133,10 @@ restart:
goto out_free;
}
btrfs_commit_transaction(trans);
+out_free:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
return err;
@@ -4271,8 +4192,10 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
return ERR_CAST(root);
trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
+ btrfs_put_root(root);
return ERR_CAST(trans);
+ }
err = btrfs_find_free_objectid(root, &objectid);
if (err)
@@ -4290,6 +4213,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
err = btrfs_orphan_add(trans, BTRFS_I(inode));
out:
+ btrfs_put_root(root);
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
if (err) {
@@ -4317,6 +4241,18 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
return rc;
}
+static void free_reloc_control(struct reloc_control *rc)
+{
+ struct mapping_node *node, *tmp;
+
+ free_reloc_roots(&rc->reloc_roots);
+ rbtree_postorder_for_each_entry_safe(node, tmp,
+ &rc->reloc_root_tree.rb_root, rb_node)
+ kfree(node);
+
+ kfree(rc);
+}
+
/*
* Print the block group being relocated
*/
@@ -4461,7 +4397,7 @@ out:
btrfs_dec_block_group_ro(rc->block_group);
iput(rc->data_inode);
btrfs_put_block_group(rc->block_group);
- kfree(rc);
+ free_reloc_control(rc);
return err;
}
@@ -4537,12 +4473,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
key.type != BTRFS_ROOT_ITEM_KEY)
break;
- reloc_root = btrfs_read_fs_root(root, &key);
+ reloc_root = btrfs_read_tree_root(root, &key);
if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root);
goto out;
}
+ set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
list_add(&reloc_root->root_list, &reloc_roots);
if (btrfs_root_refs(&reloc_root->root_item) > 0) {
@@ -4559,6 +4496,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
err = ret;
goto out;
}
+ } else {
+ btrfs_put_root(fs_root);
}
}
@@ -4584,9 +4523,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
- unset_reloc_control(rc);
err = PTR_ERR(trans);
- goto out_free;
+ goto out_unset;
}
rc->merge_reloc_tree = 1;
@@ -4606,17 +4544,18 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
list_add_tail(&reloc_root->root_list, &reloc_roots);
- goto out_free;
+ goto out_unset;
}
err = __add_reloc_root(reloc_root);
BUG_ON(err < 0); /* -ENOMEM or logic error */
- fs_root->reloc_root = reloc_root;
+ fs_root->reloc_root = btrfs_grab_root(reloc_root);
+ btrfs_put_root(fs_root);
}
err = btrfs_commit_transaction(trans);
if (err)
- goto out_free;
+ goto out_unset;
merge_reloc_roots(rc);
@@ -4625,15 +4564,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out_free;
+ goto out_clean;
}
err = btrfs_commit_transaction(trans);
-
+out_clean:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
- kfree(rc);
+out_unset:
+ unset_reloc_control(rc);
+ free_reloc_control(rc);
out:
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
@@ -4643,10 +4583,12 @@ out:
if (err == 0) {
/* cleanup orphan inode in data relocation tree */
fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (IS_ERR(fs_root))
+ if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
- else
+ } else {
err = btrfs_orphan_cleanup(fs_root);
+ btrfs_put_root(fs_root);
+ }
}
return err;
}
@@ -4720,11 +4662,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (buf == root->node)
- __update_reloc_root(root, cow->start);
- }
-
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item))
@@ -4795,6 +4732,10 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
/*
* called after snapshot is created. migrate block reservation
* and create reloc root for the newly created snapshot
+ *
+ * This is similar to btrfs_init_reloc_root(), we come out of here with two
+ * references held on the reloc_root, one for root->reloc_root and one for
+ * rc->reloc_roots.
*/
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending)
@@ -4827,7 +4768,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
ret = __add_reloc_root(reloc_root);
BUG_ON(ret < 0);
- new_root->reloc_root = reloc_root;
+ new_root->reloc_root = btrfs_grab_root(reloc_root);
if (rc->create_reloc_tree)
ret = clone_backref_node(trans, rc, root, reloc_root);