aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/backref.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/backref.c')
-rw-r--r--fs/btrfs/backref.c1531
1 files changed, 1314 insertions, 217 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e5d85311d5d5..18374a6d05bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -13,6 +13,8 @@
#include "transaction.h"
#include "delayed-ref.h"
#include "locking.h"
+#include "misc.h"
+#include "tree-mod-log.h"
/* Just an arbitrary number so we can be sure this happened */
#define BACKREF_FOUND_SHARED 6
@@ -136,6 +138,7 @@ struct share_check {
u64 root_objectid;
u64 inum;
int share_count;
+ bool have_delayed_delete_refs;
};
static inline int extent_is_shared(struct share_check *sc)
@@ -286,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
struct prelim_ref *ref, *next_ref;
rbtree_postorder_for_each_entry_safe(ref, next_ref,
- &preftree->root.rb_root, rbnode)
+ &preftree->root.rb_root, rbnode) {
+ free_inode_elem_list(ref->inode_list);
free_pref(ref);
+ }
preftree->root = RB_ROOT_CACHED;
preftree->count = 0;
@@ -347,33 +352,10 @@ static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
return -ENOMEM;
ref->root_id = root_id;
- if (key) {
+ if (key)
ref->key_for_search = *key;
- /*
- * We can often find data backrefs with an offset that is too
- * large (>= LLONG_MAX, maximum allowed file offset) due to
- * underflows when subtracting a file's offset with the data
- * offset of its corresponding extent data item. This can
- * happen for example in the clone ioctl.
- * So if we detect such case we set the search key's offset to
- * zero to make sure we will find the matching file extent item
- * at add_all_parents(), otherwise we will miss it because the
- * offset taken form the backref is much larger then the offset
- * of the file extent item. This can make us scan a very large
- * number of file extent items, but at least it will not make
- * us miss any.
- * This is an ugly workaround for a behaviour that should have
- * never existed, but it does and a fix for the clone ioctl
- * would touch a lot of places, cause backwards incompatibility
- * and would not fix the problem for extents cloned with older
- * kernels.
- */
- if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
- ref->key_for_search.offset >= LLONG_MAX)
- ref->key_for_search.offset = 0;
- } else {
+ else
memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
- }
ref->inode_list = NULL;
ref->level = level;
@@ -409,10 +391,36 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
wanted_disk_byte, count, sc, gfp_mask);
}
+static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
+{
+ struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct prelim_ref *ref = NULL;
+ struct prelim_ref target = {};
+ int result;
+
+ target.parent = bytenr;
+
+ while (*p) {
+ parent = *p;
+ ref = rb_entry(parent, struct prelim_ref, rbnode);
+ result = prelim_ref_compare(ref, &target);
+
+ if (result < 0)
+ p = &(*p)->rb_left;
+ else if (result > 0)
+ p = &(*p)->rb_right;
+ else
+ return 1;
+ }
+ return 0;
+}
+
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
- struct ulist *parents, struct prelim_ref *ref,
+ struct ulist *parents,
+ struct preftrees *preftrees, struct prelim_ref *ref,
int level, u64 time_seq, const u64 *extent_item_pos,
- u64 total_refs, bool ignore_offset)
+ bool ignore_offset)
{
int ret = 0;
int slot;
@@ -424,6 +432,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
u64 disk_byte;
u64 wanted_disk_byte = ref->wanted_disk_byte;
u64 count = 0;
+ u64 data_offset;
if (level != 0) {
eb = path->nodes[level];
@@ -434,18 +443,26 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
}
/*
- * We normally enter this function with the path already pointing to
- * the first item to check. But sometimes, we may enter it with
- * slot==nritems. In that case, go to the next leaf before we continue.
+ * 1. We normally enter this function with the path already pointing to
+ * the first item to check. But sometimes, we may enter it with
+ * slot == nritems.
+ * 2. We are searching for normal backref but bytenr of this leaf
+ * matches shared data backref
+ * 3. The leaf owner is not equal to the root we are searching
+ *
+ * For these cases, go to the next leaf before we continue.
*/
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- if (time_seq == SEQ_LAST)
+ eb = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(eb) ||
+ is_shared_data_backref(preftrees, eb->start) ||
+ ref->root_id != btrfs_header_owner(eb)) {
+ if (time_seq == BTRFS_SEQ_LAST)
ret = btrfs_next_leaf(root, path);
else
ret = btrfs_next_old_leaf(root, path, time_seq);
}
- while (!ret && count < total_refs) {
+ while (!ret && count < ref->count) {
eb = path->nodes[0];
slot = path->slots[0];
@@ -455,13 +472,31 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
key.type != BTRFS_EXTENT_DATA_KEY)
break;
+ /*
+ * We are searching for normal backref but bytenr of this leaf
+ * matches shared data backref, OR
+ * the leaf owner is not equal to the root we are searching for
+ */
+ if (slot == 0 &&
+ (is_shared_data_backref(preftrees, eb->start) ||
+ ref->root_id != btrfs_header_owner(eb))) {
+ if (time_seq == BTRFS_SEQ_LAST)
+ ret = btrfs_next_leaf(root, path);
+ else
+ ret = btrfs_next_old_leaf(root, path, time_seq);
+ continue;
+ }
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ data_offset = btrfs_file_extent_offset(eb, fi);
if (disk_byte == wanted_disk_byte) {
eie = NULL;
old = NULL;
- count++;
+ if (ref->key_for_search.offset == key.offset - data_offset)
+ count++;
+ else
+ goto next;
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
@@ -483,7 +518,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
eie = NULL;
}
next:
- if (time_seq == SEQ_LAST)
+ if (time_seq == BTRFS_SEQ_LAST)
ret = btrfs_next_item(root, path);
else
ret = btrfs_next_old_item(root, path, time_seq);
@@ -502,59 +537,82 @@ next:
*/
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
+ struct preftrees *preftrees,
struct prelim_ref *ref, struct ulist *parents,
- const u64 *extent_item_pos, u64 total_refs,
- bool ignore_offset)
+ const u64 *extent_item_pos, bool ignore_offset)
{
struct btrfs_root *root;
- struct btrfs_key root_key;
struct extent_buffer *eb;
int ret = 0;
int root_level;
int level = ref->level;
- int index;
+ struct btrfs_key search_key = ref->key_for_search;
- root_key.objectid = ref->root_id;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
-
- index = srcu_read_lock(&fs_info->subvol_srcu);
-
- root = btrfs_get_fs_root(fs_info, &root_key, false);
+ /*
+ * If we're search_commit_root we could possibly be holding locks on
+ * other tree nodes. This happens when qgroups does backref walks when
+ * adding new delayed refs. To deal with this we need to look in cache
+ * for the root, and if we don't find it then we need to search the
+ * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
+ * here.
+ */
+ if (path->search_commit_root)
+ root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
+ else
+ root = btrfs_get_fs_root(fs_info, ref->root_id, false);
if (IS_ERR(root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = PTR_ERR(root);
+ goto out_free;
+ }
+
+ if (!path->search_commit_root &&
+ test_bit(BTRFS_ROOT_DELETING, &root->state)) {
+ ret = -ENOENT;
goto out;
}
if (btrfs_is_testing(fs_info)) {
- srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -ENOENT;
goto out;
}
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
- else if (time_seq == SEQ_LAST)
+ else if (time_seq == BTRFS_SEQ_LAST)
root_level = btrfs_header_level(root->node);
else
root_level = btrfs_old_root_level(root, time_seq);
- if (root_level + 1 == level) {
- srcu_read_unlock(&fs_info->subvol_srcu, index);
+ if (root_level + 1 == level)
goto out;
- }
+ /*
+ * We can often find data backrefs with an offset that is too large
+ * (>= LLONG_MAX, maximum allowed file offset) due to underflows when
+ * subtracting a file's offset with the data offset of its
+ * corresponding extent data item. This can happen for example in the
+ * clone ioctl.
+ *
+ * So if we detect such case we set the search key's offset to zero to
+ * make sure we will find the matching file extent item at
+ * add_all_parents(), otherwise we will miss it because the offset
+ * taken form the backref is much larger then the offset of the file
+ * extent item. This can make us scan a very large number of file
+ * extent items, but at least it will not make us miss any.
+ *
+ * This is an ugly workaround for a behaviour that should have never
+ * existed, but it does and a fix for the clone ioctl would touch a lot
+ * of places, cause backwards incompatibility and would not fix the
+ * problem for extents cloned with older kernels.
+ */
+ if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
+ search_key.offset >= LLONG_MAX)
+ search_key.offset = 0;
path->lowest_level = level;
- if (time_seq == SEQ_LAST)
- ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
- 0, 0);
+ if (time_seq == BTRFS_SEQ_LAST)
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
else
- ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
- time_seq);
-
- /* root node has been locked, we can release @subvol_srcu safely here */
- srcu_read_unlock(&fs_info->subvol_srcu, index);
+ ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
btrfs_debug(fs_info,
"search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
@@ -574,9 +632,11 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
eb = path->nodes[level];
}
- ret = add_all_parents(root, path, parents, ref, level, time_seq,
- extent_item_pos, total_refs, ignore_offset);
+ ret = add_all_parents(root, path, parents, preftrees, ref, level,
+ time_seq, extent_item_pos, ignore_offset);
out:
+ btrfs_put_root(root);
+out_free:
path->lowest_level = 0;
btrfs_release_path(path);
return ret;
@@ -590,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
return (struct extent_inode_elem *)(uintptr_t)node->aux;
}
+static void free_leaf_list(struct ulist *ulist)
+{
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(ulist, &uiter)))
+ free_inode_elem_list(unode_aux_to_inode_list(node));
+
+ ulist_free(ulist);
+}
+
/*
* We maintain three separate rbtrees: one for direct refs, one for
* indirect refs which have a key, and one for indirect refs which do not
@@ -609,7 +681,7 @@ unode_aux_to_inode_list(struct ulist_node *node)
static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct preftrees *preftrees,
- const u64 *extent_item_pos, u64 total_refs,
+ const u64 *extent_item_pos,
struct share_check *sc, bool ignore_offset)
{
int err;
@@ -653,9 +725,9 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
ret = BACKREF_FOUND_SHARED;
goto out;
}
- err = resolve_indirect_ref(fs_info, path, time_seq, ref,
- parents, extent_item_pos,
- total_refs, ignore_offset);
+ err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
+ ref, parents, extent_item_pos,
+ ignore_offset);
/*
* we can only tolerate ENOENT,otherwise,we should catch error
* and return directly.
@@ -704,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
cond_resched();
}
out:
- ulist_free(parents);
+ /*
+ * We may have inode lists attached to refs in the parents ulist, so we
+ * must free them before freeing the ulist and its refs.
+ */
+ free_leaf_list(parents);
return ret;
}
@@ -727,16 +803,18 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(ref->key_for_search.type);
BUG_ON(!ref->wanted_disk_byte);
- eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
- ref->level - 1, NULL);
+ eb = read_tree_block(fs_info, ref->wanted_disk_byte,
+ ref->root_id, 0, ref->level - 1, NULL);
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
- } else if (!extent_buffer_uptodate(eb)) {
+ }
+ if (!extent_buffer_uptodate(eb)) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
}
+
if (lock)
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
@@ -758,20 +836,14 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
*/
static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head, u64 seq,
- struct preftrees *preftrees, u64 *total_refs,
- struct share_check *sc)
+ struct preftrees *preftrees, struct share_check *sc)
{
struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
- struct btrfs_key tmp_op_key;
struct rb_node *n;
int count;
int ret = 0;
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-
spin_lock(&head->lock);
for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -793,15 +865,20 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
default:
BUG();
}
- *total_refs += count;
switch (node->type) {
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_key *key_ptr = NULL;
+
+ if (head->extent_op && head->extent_op->update_key) {
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
+ key_ptr = &key;
+ }
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
- &tmp_op_key, ref->level + 1,
+ key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -827,13 +904,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
key.offset = ref->offset;
/*
- * Found a inum that doesn't match our known inum, we
- * know it's shared.
+ * If we have a share check context and a reference for
+ * another inode, we can't exit immediately. This is
+ * because even if this is a BTRFS_ADD_DELAYED_REF
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
+ * which cancels out this ADD reference.
+ *
+ * If this is a DROP reference and there was no previous
+ * ADD reference, then we need to signal that when we
+ * process references from the extent tree (through
+ * add_inline_refs() and add_keyed_refs()), we should
+ * not exit early if we find a reference for another
+ * inode, because one of the delayed DROP references
+ * may cancel that reference in the extent tree.
*/
- if (sc && sc->inum && ref->objectid != sc->inum) {
- ret = BACKREF_FOUND_SHARED;
- goto out;
- }
+ if (sc && count < 0)
+ sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
&key, 0, node->bytenr, count, sc,
@@ -863,7 +949,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
if (!ret)
ret = extent_is_shared(sc);
-out:
+
spin_unlock(&head->lock);
return ret;
}
@@ -876,7 +962,7 @@ out:
static int add_inline_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 bytenr,
int *info_level, struct preftrees *preftrees,
- u64 *total_refs, struct share_check *sc)
+ struct share_check *sc)
{
int ret = 0;
int slot;
@@ -895,12 +981,11 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
leaf = path->nodes[0];
slot = path->slots[0];
- item_size = btrfs_item_size_nr(leaf, slot);
+ item_size = btrfs_item_size(leaf, slot);
BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
- *total_refs += btrfs_extent_refs(leaf, ei);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
ptr = (unsigned long)(ei + 1);
@@ -967,7 +1052,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -977,6 +1063,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
ret = add_indirect_ref(fs_info, preftrees, root,
&key, 0, bytenr, count,
sc, GFP_NOFS);
+
break;
}
default:
@@ -995,12 +1082,12 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
*
* Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
*/
-static int add_keyed_refs(struct btrfs_fs_info *fs_info,
+static int add_keyed_refs(struct btrfs_root *extent_root,
struct btrfs_path *path, u64 bytenr,
int info_level, struct preftrees *preftrees,
struct share_check *sc)
{
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_fs_info *fs_info = extent_root->fs_info;
int ret;
int slot;
struct extent_buffer *leaf;
@@ -1066,7 +1153,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1094,8 +1182,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
* indirect refs to their parent bytenr.
* When roots are found, they're added to the roots list
*
- * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
- * much like trans == NULL case, the difference only lies in it will not
+ * If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and
+ * behave much like trans == NULL case, the difference only lies in it will not
* commit root.
* The special case is for qgroup to search roots in commit_transaction().
*
@@ -1116,6 +1204,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct ulist *roots, const u64 *extent_item_pos,
struct share_check *sc, bool ignore_offset)
{
+ struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
@@ -1125,8 +1214,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct prelim_ref *ref;
struct rb_node *node;
struct extent_inode_elem *eie = NULL;
- /* total of both direct AND indirect refs! */
- u64 total_refs = 0;
struct preftrees preftrees = {
.direct = PREFTREE_INIT,
.indirect = PREFTREE_INIT,
@@ -1148,31 +1235,29 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
path->skip_locking = 1;
}
- if (time_seq == SEQ_LAST)
+ if (time_seq == BTRFS_SEQ_LAST)
path->skip_locking = 1;
- /*
- * grab both a lock on the path and a lock on the delayed ref head.
- * We need both to get a consistent picture of how the refs look
- * at a specified point in time
- */
again:
head = NULL;
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ if (ret == 0) {
+ /* This shouldn't happen, indicates a bug or fs corruption. */
+ ASSERT(ret != 0);
+ ret = -EUCLEAN;
+ goto out;
+ }
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (trans && likely(trans->type != __TRANS_DUMMY) &&
- time_seq != SEQ_LAST) {
-#else
- if (trans && time_seq != SEQ_LAST) {
-#endif
+ time_seq != BTRFS_SEQ_LAST) {
/*
- * look if there are updates for this ref queued and lock the
- * head
+ * We have a specific time_seq we care about and trans which
+ * means we have the path lock, we need to grab the ref head and
+ * lock it so we have a consistent view of the refs at the given
+ * time.
*/
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
@@ -1195,7 +1280,7 @@ again:
}
spin_unlock(&delayed_refs->lock);
ret = add_delayed_refs(fs_info, head, time_seq,
- &preftrees, &total_refs, sc);
+ &preftrees, sc);
mutex_unlock(&head->mutex);
if (ret)
goto out;
@@ -1216,11 +1301,10 @@ again:
(key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY)) {
ret = add_inline_refs(fs_info, path, bytenr,
- &info_level, &preftrees,
- &total_refs, sc);
+ &info_level, &preftrees, sc);
if (ret)
goto out;
- ret = add_keyed_refs(fs_info, path, bytenr, info_level,
+ ret = add_keyed_refs(root, path, bytenr, info_level,
&preftrees, sc);
if (ret)
goto out;
@@ -1236,7 +1320,7 @@ again:
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
- extent_item_pos, total_refs, sc, ignore_offset);
+ extent_item_pos, sc, ignore_offset);
if (ret)
goto out;
@@ -1281,28 +1365,33 @@ again:
struct extent_buffer *eb;
eb = read_tree_block(fs_info, ref->parent, 0,
- ref->level, NULL);
+ 0, ref->level, NULL);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
- } else if (!extent_buffer_uptodate(eb)) {
+ }
+ if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
}
- if (!path->skip_locking) {
+ if (!path->skip_locking)
btrfs_tree_read_lock(eb);
- btrfs_set_lock_blocking_read(eb);
- }
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie, ignore_offset);
if (!path->skip_locking)
- btrfs_tree_read_unlock_blocking(eb);
+ btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
if (ret < 0)
goto out;
ref->inode_list = eie;
+ /*
+ * We transferred the list ownership to the ref,
+ * so set to NULL to avoid a double free in case
+ * an error happens after this.
+ */
+ eie = NULL;
}
ret = ulist_add_merge_ptr(refs, ref->parent,
ref->inode_list,
@@ -1311,15 +1400,31 @@ again:
goto out;
if (!ret && extent_item_pos) {
/*
- * we've recorded that parent, so we must extend
- * its inode list here
+ * We've recorded that parent, so we must extend
+ * its inode list here.
+ *
+ * However if there was corruption we may not
+ * have found an eie, return an error in this
+ * case.
*/
- BUG_ON(!eie);
+ ASSERT(eie);
+ if (!eie) {
+ ret = -EUCLEAN;
+ goto out;
+ }
while (eie->next)
eie = eie->next;
eie->next = ref->inode_list;
}
eie = NULL;
+ /*
+ * We have transferred the inode list ownership from
+ * this ref to the ref we added to the 'refs' ulist.
+ * So set this ref's inode list to NULL to avoid
+ * use-after-free when our caller uses it or double
+ * frees in case an error happens before we return.
+ */
+ ref->inode_list = NULL;
}
cond_resched();
}
@@ -1336,24 +1441,6 @@ out:
return ret;
}
-static void free_leaf_list(struct ulist *blocks)
-{
- struct ulist_node *node = NULL;
- struct extent_inode_elem *eie;
- struct ulist_iterator uiter;
-
- ULIST_ITER_INIT(&uiter);
- while ((node = ulist_next(blocks, &uiter))) {
- if (!node->aux)
- continue;
- eie = unode_aux_to_inode_list(node);
- free_inode_elem_list(eie);
- node->aux = 0;
- }
-
- ulist_free(blocks);
-}
-
/*
* Finds all leafs with a reference to the specified combination of bytenr and
* offset. key_list_head will point to a list of corresponding keys (caller must
@@ -1362,10 +1449,10 @@ static void free_leaf_list(struct ulist *blocks)
*
* returns 0 on success, <0 on error
*/
-static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **leafs,
- const u64 *extent_item_pos, bool ignore_offset)
+int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **leafs,
+ const u64 *extent_item_pos, bool ignore_offset)
{
int ret;
@@ -1422,6 +1509,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
+ *roots = NULL;
return ret;
}
node = ulist_next(tmp, &uiter);
@@ -1438,23 +1526,150 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
- bool ignore_offset)
+ bool skip_commit_root_sem)
{
int ret;
- if (!trans)
+ if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
- time_seq, roots, ignore_offset);
- if (!trans)
+ time_seq, roots, false);
+ if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
}
-/**
- * btrfs_check_shared - tell us whether an extent is shared
+/*
+ * The caller has joined a transaction or is holding a read lock on the
+ * fs_info->commit_root_sem semaphore, so no need to worry about the root's last
+ * snapshot field changing while updating or checking the cache.
+ */
+static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
+ struct btrfs_root *root,
+ u64 bytenr, int level, bool *is_shared)
+{
+ struct btrfs_backref_shared_cache_entry *entry;
+
+ if (!cache->use_cache)
+ return false;
+
+ if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
+ return false;
+
+ /*
+ * Level -1 is used for the data extent, which is not reliable to cache
+ * because its reference count can increase or decrease without us
+ * realizing. We cache results only for extent buffers that lead from
+ * the root node down to the leaf with the file extent item.
+ */
+ ASSERT(level >= 0);
+
+ entry = &cache->entries[level];
+
+ /* Unused cache entry or being used for some other extent buffer. */
+ if (entry->bytenr != bytenr)
+ return false;
+
+ /*
+ * We cached a false result, but the last snapshot generation of the
+ * root changed, so we now have a snapshot. Don't trust the result.
+ */
+ if (!entry->is_shared &&
+ entry->gen != btrfs_root_last_snapshot(&root->root_item))
+ return false;
+
+ /*
+ * If we cached a true result and the last generation used for dropping
+ * a root changed, we can not trust the result, because the dropped root
+ * could be a snapshot sharing this extent buffer.
+ */
+ if (entry->is_shared &&
+ entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
+ return false;
+
+ *is_shared = entry->is_shared;
+ /*
+ * If the node at this level is shared, than all nodes below are also
+ * shared. Currently some of the nodes below may be marked as not shared
+ * because we have just switched from one leaf to another, and switched
+ * also other nodes above the leaf and below the current level, so mark
+ * them as shared.
+ */
+ if (*is_shared) {
+ for (int i = 0; i < level; i++) {
+ cache->entries[i].is_shared = true;
+ cache->entries[i].gen = entry->gen;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * The caller has joined a transaction or is holding a read lock on the
+ * fs_info->commit_root_sem semaphore, so no need to worry about the root's last
+ * snapshot field changing while updating or checking the cache.
+ */
+static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
+ struct btrfs_root *root,
+ u64 bytenr, int level, bool is_shared)
+{
+ struct btrfs_backref_shared_cache_entry *entry;
+ u64 gen;
+
+ if (!cache->use_cache)
+ return;
+
+ if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
+ return;
+
+ /*
+ * Level -1 is used for the data extent, which is not reliable to cache
+ * because its reference count can increase or decrease without us
+ * realizing. We cache results only for extent buffers that lead from
+ * the root node down to the leaf with the file extent item.
+ */
+ ASSERT(level >= 0);
+
+ if (is_shared)
+ gen = btrfs_get_last_root_drop_gen(root->fs_info);
+ else
+ gen = btrfs_root_last_snapshot(&root->root_item);
+
+ entry = &cache->entries[level];
+ entry->bytenr = bytenr;
+ entry->is_shared = is_shared;
+ entry->gen = gen;
+
+ /*
+ * If we found an extent buffer is shared, set the cache result for all
+ * extent buffers below it to true. As nodes in the path are COWed,
+ * their sharedness is moved to their children, and if a leaf is COWed,
+ * then the sharedness of a data extent becomes direct, the refcount of
+ * data extent is increased in the extent item at the extent tree.
+ */
+ if (is_shared) {
+ for (int i = 0; i < level; i++) {
+ entry = &cache->entries[i];
+ entry->is_shared = is_shared;
+ entry->gen = gen;
+ }
+ }
+}
+
+/*
+ * Check if a data extent is shared or not.
*
- * btrfs_check_shared uses the backref walking code but will short
+ * @root: The root the inode belongs to.
+ * @inum: Number of the inode whose extent we are checking.
+ * @bytenr: Logical bytenr of the extent we are checking.
+ * @extent_gen: Generation of the extent (file extent item) or 0 if it is
+ * not known.
+ * @roots: List of roots this extent is shared among.
+ * @tmp: Temporary list used for iteration.
+ * @cache: A backref lookup result cache.
+ *
+ * btrfs_is_data_extent_shared uses the backref walking code but will short
* circuit as soon as it finds a root or inode that doesn't match the
* one passed in. This provides a significant performance benefit for
* callers (such as fiemap) which want to know whether the extent is
@@ -1465,20 +1680,24 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
- struct ulist *roots, struct ulist *tmp)
+int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ u64 extent_gen,
+ struct ulist *roots, struct ulist *tmp,
+ struct btrfs_backref_shared_cache *cache)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
struct ulist_iterator uiter;
struct ulist_node *node;
- struct seq_list elem = SEQ_LIST_INIT(elem);
+ struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem);
int ret = 0;
struct share_check shared = {
.root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
+ .have_delayed_delete_refs = false,
};
+ int level;
ulist_init(roots);
ulist_init(tmp);
@@ -1495,23 +1714,73 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
btrfs_get_tree_mod_seq(fs_info, &elem);
}
+ /* -1 means we are in the bytenr of the data extent. */
+ level = -1;
ULIST_ITER_INIT(&uiter);
+ cache->use_cache = true;
while (1) {
+ bool is_shared;
+ bool cached;
+
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
+ if (level >= 0)
+ store_backref_shared_cache(cache, root, bytenr,
+ level, true);
break;
}
if (ret < 0 && ret != -ENOENT)
break;
ret = 0;
+ /*
+ * If our data extent is not shared through reflinks and it was
+ * created in a generation after the last one used to create a
+ * snapshot of the inode's root, then it can not be shared
+ * indirectly through subtrees, as that can only happen with
+ * snapshots. In this case bail out, no need to check for the
+ * sharedness of extent buffers.
+ */
+ if (level == -1 &&
+ extent_gen > btrfs_root_last_snapshot(&root->root_item))
+ break;
+
+ /*
+ * If our data extent was not directly shared (without multiple
+ * reference items), than it might have a single reference item
+ * with a count > 1 for the same offset, which means there are 2
+ * (or more) file extent items that point to the data extent -
+ * this happens when a file extent item needs to be split and
+ * then one item gets moved to another leaf due to a b+tree leaf
+ * split when inserting some item. In this case the file extent
+ * items may be located in different leaves and therefore some
+ * of the leaves may be referenced through shared subtrees while
+ * others are not. Since our extent buffer cache only works for
+ * a single path (by far the most common case and simpler to
+ * deal with), we can not use it if we have multiple leaves
+ * (which implies multiple paths).
+ */
+ if (level == -1 && tmp->nnodes > 1)
+ cache->use_cache = false;
+
+ if (level >= 0)
+ store_backref_shared_cache(cache, root, bytenr,
+ level, false);
node = ulist_next(tmp, &uiter);
if (!node)
break;
bytenr = node->val;
+ level++;
+ cached = lookup_backref_shared_cache(cache, root, bytenr, level,
+ &is_shared);
+ if (cached) {
+ ret = (is_shared ? 1 : 0);
+ break;
+ }
shared.share_count = 0;
+ shared.have_delayed_delete_refs = false;
cond_resched();
}
@@ -1620,13 +1889,11 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = ((s64)size) - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
- int leave_spinning = path->leave_spinning;
struct btrfs_inode_ref *iref;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
- path->leave_spinning = 1;
while (1) {
bytes_left -= name_len;
if (bytes_left >= 0)
@@ -1634,7 +1901,7 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
name_off, name_len);
if (eb != eb_in) {
if (!path->skip_locking)
- btrfs_tree_read_unlock_blocking(eb);
+ btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1654,8 +1921,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in) {
- if (!path->skip_locking)
- btrfs_set_lock_blocking_read(eb);
path->nodes[0] = NULL;
path->locks[0] = 0;
}
@@ -1672,7 +1937,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
- path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);
@@ -1689,6 +1953,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
struct btrfs_path *path, struct btrfs_key *found_key,
u64 *flags_ret)
{
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
int ret;
u64 flags;
u64 size = 0;
@@ -1704,11 +1969,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
key.objectid = logical;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
+ ret = btrfs_previous_extent_item(extent_root, path, 0);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -1728,7 +1993,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
}
eb = path->nodes[0];
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
+ item_size = btrfs_item_size(eb, path->slots[0]);
BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
@@ -1903,7 +2168,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
+ struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem);
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
@@ -1911,7 +2176,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
extent_item_objectid);
if (!search_commit_root) {
- trans = btrfs_attach_transaction(fs_info->extent_root);
+ trans = btrfs_attach_transaction(fs_info->tree_root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT &&
PTR_ERR(trans) != -EROFS)
@@ -1921,12 +2186,12 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
}
if (trans)
- btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ btrfs_get_tree_mod_seq(fs_info, &seq_elem);
else
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
- tree_mod_seq_elem.seq, &refs,
+ seq_elem.seq, &refs,
&extent_item_pos, ignore_offset);
if (ret)
goto out;
@@ -1934,7 +2199,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
- tree_mod_seq_elem.seq, &roots,
+ seq_elem.seq, &roots,
ignore_offset);
if (ret)
break;
@@ -1957,7 +2222,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
free_leaf_list(refs);
out:
if (trans) {
- btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ btrfs_put_tree_mod_seq(fs_info, &seq_elem);
btrfs_end_transaction(trans);
} else {
up_read(&fs_info->commit_root_sem);
@@ -1966,10 +2231,29 @@ out:
return ret;
}
+static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
+{
+ struct btrfs_data_container *inodes = ctx;
+ const size_t c = 3 * sizeof(u64);
+
+ if (inodes->bytes_left >= c) {
+ inodes->bytes_left -= c;
+ inodes->val[inodes->elem_cnt] = inum;
+ inodes->val[inodes->elem_cnt + 1] = offset;
+ inodes->val[inodes->elem_cnt + 2] = root;
+ inodes->elem_cnt += 3;
+ } else {
+ inodes->bytes_missing += c - inodes->bytes_left;
+ inodes->bytes_left = 0;
+ inodes->elem_missed += 3;
+ }
+
+ return 0;
+}
+
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- iterate_extent_inodes_t *iterate, void *ctx,
- bool ignore_offset)
+ void *ctx, bool ignore_offset)
{
int ret;
u64 extent_item_pos;
@@ -1987,17 +2271,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root,
- iterate, ctx, ignore_offset);
+ build_ino_list, ctx, ignore_offset);
return ret;
}
-typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
- struct extent_buffer *eb, void *ctx);
+static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb, struct inode_fs_paths *ipath);
-static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
- struct btrfs_path *path,
- iterate_irefs_t *iterate, void *ctx)
+static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
{
int ret = 0;
int slot;
@@ -2006,8 +2288,9 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
u32 name_len;
u64 parent = 0;
int found = 0;
+ struct btrfs_root *fs_root = ipath->fs_root;
+ struct btrfs_path *path = ipath->btrfs_path;
struct extent_buffer *eb;
- struct btrfs_item *item;
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
@@ -2033,18 +2316,17 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
}
btrfs_release_path(path);
- item = btrfs_item_nr(slot);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
- for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
+ for (cur = 0; cur < btrfs_item_size(eb, slot); cur += len) {
name_len = btrfs_inode_ref_name_len(eb, iref);
/* path must be released before calling iterate()! */
btrfs_debug(fs_root->fs_info,
"following ref at offset %u for inode %llu in tree %llu",
cur, found_key.objectid,
fs_root->root_key.objectid);
- ret = iterate(parent, name_len,
- (unsigned long)(iref + 1), eb, ctx);
+ ret = inode_to_path(parent, name_len,
+ (unsigned long)(iref + 1), eb, ipath);
if (ret)
break;
len = sizeof(*iref) + name_len;
@@ -2058,15 +2340,15 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
return ret;
}
-static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
- struct btrfs_path *path,
- iterate_irefs_t *iterate, void *ctx)
+static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath)
{
int ret;
int slot;
u64 offset = 0;
u64 parent;
int found = 0;
+ struct btrfs_root *fs_root = ipath->fs_root;
+ struct btrfs_path *path = ipath->btrfs_path;
struct extent_buffer *eb;
struct btrfs_inode_extref *extref;
u32 item_size;
@@ -2092,7 +2374,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
}
btrfs_release_path(path);
- item_size = btrfs_item_size_nr(eb, slot);
+ item_size = btrfs_item_size(eb, slot);
ptr = btrfs_item_ptr_offset(eb, slot);
cur_offset = 0;
@@ -2102,8 +2384,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
parent = btrfs_inode_extref_parent(eb, extref);
name_len = btrfs_inode_extref_name_len(eb, extref);
- ret = iterate(parent, name_len,
- (unsigned long)&extref->name, eb, ctx);
+ ret = inode_to_path(parent, name_len,
+ (unsigned long)&extref->name, eb, ipath);
if (ret)
break;
@@ -2120,34 +2402,13 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
return ret;
}
-static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
- struct btrfs_path *path, iterate_irefs_t *iterate,
- void *ctx)
-{
- int ret;
- int found_refs = 0;
-
- ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
- if (!ret)
- ++found_refs;
- else if (ret != -ENOENT)
- return ret;
-
- ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
- if (ret == -ENOENT && found_refs)
- return 0;
-
- return ret;
-}
-
/*
* returns 0 if the path could be dumped (probably truncated)
* returns <0 in case of an error
*/
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
- struct extent_buffer *eb, void *ctx)
+ struct extent_buffer *eb, struct inode_fs_paths *ipath)
{
- struct inode_fs_paths *ipath = ctx;
char *fspath;
char *fspath_min;
int i = ipath->fspath->elem_cnt;
@@ -2188,8 +2449,20 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
*/
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
{
- return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
- inode_to_path, ipath);
+ int ret;
+ int found_refs = 0;
+
+ ret = iterate_inode_refs(inum, ipath);
+ if (!ret)
+ ++found_refs;
+ else if (ret != -ENOENT)
+ return ret;
+
+ ret = iterate_inode_extrefs(inum, ipath);
+ if (ret == -ENOENT && found_refs)
+ return 0;
+
+ return ret;
}
struct btrfs_data_container *init_data_container(u32 total_bytes)
@@ -2252,3 +2525,827 @@ void free_ipath(struct inode_fs_paths *ipath)
kvfree(ipath->fspath);
kfree(ipath);
}
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+ struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
+{
+ struct btrfs_backref_iter *ret;
+
+ ret = kzalloc(sizeof(*ret), gfp_flag);
+ if (!ret)
+ return NULL;
+
+ ret->path = btrfs_alloc_path();
+ if (!ret->path) {
+ kfree(ret);
+ return NULL;
+ }
+
+ /* Current backref iterator only supports iteration in commit root */
+ ret->path->search_commit_root = 1;
+ ret->path->skip_locking = 1;
+ ret->fs_info = fs_info;
+
+ return ret;
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
+{
+ struct btrfs_fs_info *fs_info = iter->fs_info;
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
+ struct btrfs_path *path = iter->path;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+ int ret;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = (u64)-1;
+ iter->bytenr = bytenr;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+ if (ret == 0) {
+ ret = -EUCLEAN;
+ goto release;
+ }
+ if (path->slots[0] == 0) {
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ ret = -EUCLEAN;
+ goto release;
+ }
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
+ ret = -ENOENT;
+ goto release;
+ }
+ memcpy(&iter->cur_key, &key, sizeof(key));
+ iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+ path->slots[0]);
+ iter->end_ptr = (u32)(iter->item_ptr +
+ btrfs_item_size(path->nodes[0], path->slots[0]));
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_extent_item);
+
+ /*
+ * Only support iteration on tree backref yet.
+ *
+ * This is an extra precaution for non skinny-metadata, where
+ * EXTENT_ITEM is also used for tree blocks, that we can only use
+ * extent flags to determine if it's a tree block.
+ */
+ if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
+ ret = -ENOTSUPP;
+ goto release;
+ }
+ iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
+
+ /* If there is no inline backref, go search for keyed backref */
+ if (iter->cur_ptr >= iter->end_ptr) {
+ ret = btrfs_next_item(extent_root, path);
+
+ /* No inline nor keyed ref */
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto release;
+ }
+ if (ret < 0)
+ goto release;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
+ path->slots[0]);
+ if (iter->cur_key.objectid != bytenr ||
+ (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+ iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
+ ret = -ENOENT;
+ goto release;
+ }
+ iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+ path->slots[0]);
+ iter->item_ptr = iter->cur_ptr;
+ iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size(
+ path->nodes[0], path->slots[0]));
+ }
+
+ return 0;
+release:
+ btrfs_backref_iter_release(iter);
+ return ret;
+}
+
+/*
+ * Go to the next backref item of current bytenr, can be either inlined or
+ * keyed.
+ *
+ * Caller needs to check whether it's inline ref or not by iter->cur_key.
+ *
+ * Return 0 if we get next backref without problem.
+ * Return >0 if there is no extra backref for this bytenr.
+ * Return <0 if there is something wrong happened.
+ */
+int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
+{
+ struct extent_buffer *eb = btrfs_backref_get_eb(iter);
+ struct btrfs_root *extent_root;
+ struct btrfs_path *path = iter->path;
+ struct btrfs_extent_inline_ref *iref;
+ int ret;
+ u32 size;
+
+ if (btrfs_backref_iter_is_inline_ref(iter)) {
+ /* We're still inside the inline refs */
+ ASSERT(iter->cur_ptr < iter->end_ptr);
+
+ if (btrfs_backref_has_tree_block_info(iter)) {
+ /* First tree block info */
+ size = sizeof(struct btrfs_tree_block_info);
+ } else {
+ /* Use inline ref type to determine the size */
+ int type;
+
+ iref = (struct btrfs_extent_inline_ref *)
+ ((unsigned long)iter->cur_ptr);
+ type = btrfs_extent_inline_ref_type(eb, iref);
+
+ size = btrfs_extent_inline_ref_size(type);
+ }
+ iter->cur_ptr += size;
+ if (iter->cur_ptr < iter->end_ptr)
+ return 0;
+
+ /* All inline items iterated, fall through */
+ }
+
+ /* We're at keyed items, there is no inline item, go to the next one */
+ extent_root = btrfs_extent_root(iter->fs_info, iter->bytenr);
+ ret = btrfs_next_item(extent_root, iter->path);
+ if (ret)
+ return ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
+ if (iter->cur_key.objectid != iter->bytenr ||
+ (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+ iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
+ return 1;
+ iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+ path->slots[0]);
+ iter->cur_ptr = iter->item_ptr;
+ iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size(path->nodes[0],
+ path->slots[0]);
+ return 0;
+}
+
+void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_backref_cache *cache, int is_reloc)
+{
+ int i;
+
+ cache->rb_root = RB_ROOT;
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+ INIT_LIST_HEAD(&cache->pending[i]);
+ INIT_LIST_HEAD(&cache->changed);
+ INIT_LIST_HEAD(&cache->detached);
+ INIT_LIST_HEAD(&cache->leaves);
+ INIT_LIST_HEAD(&cache->pending_edge);
+ INIT_LIST_HEAD(&cache->useless_node);
+ cache->fs_info = fs_info;
+ cache->is_reloc = is_reloc;
+}
+
+struct btrfs_backref_node *btrfs_backref_alloc_node(
+ struct btrfs_backref_cache *cache, u64 bytenr, int level)
+{
+ struct btrfs_backref_node *node;
+
+ ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
+ node = kzalloc(sizeof(*node), GFP_NOFS);
+ if (!node)
+ return node;
+
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->upper);
+ INIT_LIST_HEAD(&node->lower);
+ RB_CLEAR_NODE(&node->rb_node);
+ cache->nr_nodes++;
+ node->level = level;
+ node->bytenr = bytenr;
+
+ return node;
+}
+
+struct btrfs_backref_edge *btrfs_backref_alloc_edge(
+ struct btrfs_backref_cache *cache)
+{
+ struct btrfs_backref_edge *edge;
+
+ edge = kzalloc(sizeof(*edge), GFP_NOFS);
+ if (edge)
+ cache->nr_edges++;
+ return edge;
+}
+
+/*
+ * Drop the backref node from cache, also cleaning up all its
+ * upper edges and any uncached nodes in the path.
+ *
+ * This cleanup happens bottom up, thus the node should either
+ * be the lowest node in the cache or a detached node.
+ */
+void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node)
+{
+ struct btrfs_backref_node *upper;
+ struct btrfs_backref_edge *edge;
+
+ if (!node)
+ return;
+
+ BUG_ON(!node->lowest && !node->detached);
+ while (!list_empty(&node->upper)) {
+ edge = list_entry(node->upper.next, struct btrfs_backref_edge,
+ list[LOWER]);
+ upper = edge->node[UPPER];
+ list_del(&edge->list[LOWER]);
+ list_del(&edge->list[UPPER]);
+ btrfs_backref_free_edge(cache, edge);
+
+ /*
+ * Add the node to leaf node list if no other child block
+ * cached.
+ */
+ if (list_empty(&upper->lower)) {
+ list_add_tail(&upper->lower, &cache->leaves);
+ upper->lowest = 1;
+ }
+ }
+
+ btrfs_backref_drop_node(cache, node);
+}
+
+/*
+ * Release all nodes/edges from current cache
+ */
+void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
+{
+ struct btrfs_backref_node *node;
+ int i;
+
+ while (!list_empty(&cache->detached)) {
+ node = list_entry(cache->detached.next,
+ struct btrfs_backref_node, list);
+ btrfs_backref_cleanup_node(cache, node);
+ }
+
+ while (!list_empty(&cache->leaves)) {
+ node = list_entry(cache->leaves.next,
+ struct btrfs_backref_node, lower);
+ btrfs_backref_cleanup_node(cache, node);
+ }
+
+ cache->last_trans = 0;
+
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+ ASSERT(list_empty(&cache->pending[i]));
+ ASSERT(list_empty(&cache->pending_edge));
+ ASSERT(list_empty(&cache->useless_node));
+ ASSERT(list_empty(&cache->changed));
+ ASSERT(list_empty(&cache->detached));
+ ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
+ ASSERT(!cache->nr_nodes);
+ ASSERT(!cache->nr_edges);
+}
+
+/*
+ * Handle direct tree backref
+ *
+ * Direct tree backref means, the backref item shows its parent bytenr
+ * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
+ *
+ * @ref_key: The converted backref key.
+ * For keyed backref, it's the item key.
+ * For inlined backref, objectid is the bytenr,
+ * type is btrfs_inline_ref_type, offset is
+ * btrfs_inline_ref_offset.
+ */
+static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
+ struct btrfs_key *ref_key,
+ struct btrfs_backref_node *cur)
+{
+ struct btrfs_backref_edge *edge;
+ struct btrfs_backref_node *upper;
+ struct rb_node *rb_node;
+
+ ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
+
+ /* Only reloc root uses backref pointing to itself */
+ if (ref_key->objectid == ref_key->offset) {
+ struct btrfs_root *root;
+
+ cur->is_reloc_root = 1;
+ /* Only reloc backref cache cares about a specific root */
+ if (cache->is_reloc) {
+ root = find_reloc_root(cache->fs_info, cur->bytenr);
+ if (!root)
+ return -ENOENT;
+ cur->root = root;
+ } else {
+ /*
+ * For generic purpose backref cache, reloc root node
+ * is useless.
+ */
+ list_add(&cur->list, &cache->useless_node);
+ }
+ return 0;
+ }
+
+ edge = btrfs_backref_alloc_edge(cache);
+ if (!edge)
+ return -ENOMEM;
+
+ rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
+ if (!rb_node) {
+ /* Parent node not yet cached */
+ upper = btrfs_backref_alloc_node(cache, ref_key->offset,
+ cur->level + 1);
+ if (!upper) {
+ btrfs_backref_free_edge(cache, edge);
+ return -ENOMEM;
+ }
+
+ /*
+ * Backrefs for the upper level block isn't cached, add the
+ * block to pending list
+ */
+ list_add_tail(&edge->list[UPPER], &cache->pending_edge);
+ } else {
+ /* Parent node already cached */
+ upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
+ ASSERT(upper->checked);
+ INIT_LIST_HEAD(&edge->list[UPPER]);
+ }
+ btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
+ return 0;
+}
+
+/*
+ * Handle indirect tree backref
+ *
+ * Indirect tree backref means, we only know which tree the node belongs to.
+ * We still need to do a tree search to find out the parents. This is for
+ * TREE_BLOCK_REF backref (keyed or inlined).
+ *
+ * @ref_key: The same as @ref_key in handle_direct_tree_backref()
+ * @tree_key: The first key of this tree block.
+ * @path: A clean (released) path, to avoid allocating path every time
+ * the function get called.
+ */
+static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
+ struct btrfs_path *path,
+ struct btrfs_key *ref_key,
+ struct btrfs_key *tree_key,
+ struct btrfs_backref_node *cur)
+{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_backref_node *upper;
+ struct btrfs_backref_node *lower;
+ struct btrfs_backref_edge *edge;
+ struct extent_buffer *eb;
+ struct btrfs_root *root;
+ struct rb_node *rb_node;
+ int level;
+ bool need_check = true;
+ int ret;
+
+ root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+ cur->cowonly = 1;
+
+ if (btrfs_root_level(&root->root_item) == cur->level) {
+ /* Tree root */
+ ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
+ /*
+ * For reloc backref cache, we may ignore reloc root. But for
+ * general purpose backref cache, we can't rely on
+ * btrfs_should_ignore_reloc_root() as it may conflict with
+ * current running relocation and lead to missing root.
+ *
+ * For general purpose backref cache, reloc root detection is
+ * completely relying on direct backref (key->offset is parent
+ * bytenr), thus only do such check for reloc cache.
+ */
+ if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
+ btrfs_put_root(root);
+ list_add(&cur->list, &cache->useless_node);
+ } else {
+ cur->root = root;
+ }
+ return 0;
+ }
+
+ level = cur->level + 1;
+
+ /* Search the tree to find parent blocks referring to the block */
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
+ path->lowest_level = level;
+ ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
+ path->lowest_level = 0;
+ if (ret < 0) {
+ btrfs_put_root(root);
+ return ret;
+ }
+ if (ret > 0 && path->slots[level] > 0)
+ path->slots[level]--;
+
+ eb = path->nodes[level];
+ if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
+ btrfs_err(fs_info,
+"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
+ cur->bytenr, level - 1, root->root_key.objectid,
+ tree_key->objectid, tree_key->type, tree_key->offset);
+ btrfs_put_root(root);
+ ret = -ENOENT;
+ goto out;
+ }
+ lower = cur;
+
+ /* Add all nodes and edges in the path */
+ for (; level < BTRFS_MAX_LEVEL; level++) {
+ if (!path->nodes[level]) {
+ ASSERT(btrfs_root_bytenr(&root->root_item) ==
+ lower->bytenr);
+ /* Same as previous should_ignore_reloc_root() call */
+ if (btrfs_should_ignore_reloc_root(root) &&
+ cache->is_reloc) {
+ btrfs_put_root(root);
+ list_add(&lower->list, &cache->useless_node);
+ } else {
+ lower->root = root;
+ }
+ break;
+ }
+
+ edge = btrfs_backref_alloc_edge(cache);
+ if (!edge) {
+ btrfs_put_root(root);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ eb = path->nodes[level];
+ rb_node = rb_simple_search(&cache->rb_root, eb->start);
+ if (!rb_node) {
+ upper = btrfs_backref_alloc_node(cache, eb->start,
+ lower->level + 1);
+ if (!upper) {
+ btrfs_put_root(root);
+ btrfs_backref_free_edge(cache, edge);
+ ret = -ENOMEM;
+ goto out;
+ }
+ upper->owner = btrfs_header_owner(eb);
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+ upper->cowonly = 1;
+
+ /*
+ * If we know the block isn't shared we can avoid
+ * checking its backrefs.
+ */
+ if (btrfs_block_can_be_shared(root, eb))
+ upper->checked = 0;
+ else
+ upper->checked = 1;
+
+ /*
+ * Add the block to pending list if we need to check its
+ * backrefs, we only do this once while walking up a
+ * tree as we will catch anything else later on.
+ */
+ if (!upper->checked && need_check) {
+ need_check = false;
+ list_add_tail(&edge->list[UPPER],
+ &cache->pending_edge);
+ } else {
+ if (upper->checked)
+ need_check = true;
+ INIT_LIST_HEAD(&edge->list[UPPER]);
+ }
+ } else {
+ upper = rb_entry(rb_node, struct btrfs_backref_node,
+ rb_node);
+ ASSERT(upper->checked);
+ INIT_LIST_HEAD(&edge->list[UPPER]);
+ if (!upper->owner)
+ upper->owner = btrfs_header_owner(eb);
+ }
+ btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
+
+ if (rb_node) {
+ btrfs_put_root(root);
+ break;
+ }
+ lower = upper;
+ upper = NULL;
+ }
+out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * Add backref node @cur into @cache.
+ *
+ * NOTE: Even if the function returned 0, @cur is not yet cached as its upper
+ * links aren't yet bi-directional. Needs to finish such links.
+ * Use btrfs_backref_finish_upper_links() to finish such linkage.
+ *
+ * @path: Released path for indirect tree backref lookup
+ * @iter: Released backref iter for extent tree search
+ * @node_key: The first key of the tree block
+ */
+int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
+ struct btrfs_path *path,
+ struct btrfs_backref_iter *iter,
+ struct btrfs_key *node_key,
+ struct btrfs_backref_node *cur)
+{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_backref_edge *edge;
+ struct btrfs_backref_node *exist;
+ int ret;
+
+ ret = btrfs_backref_iter_start(iter, cur->bytenr);
+ if (ret < 0)
+ return ret;
+ /*
+ * We skip the first btrfs_tree_block_info, as we don't use the key
+ * stored in it, but fetch it from the tree block
+ */
+ if (btrfs_backref_has_tree_block_info(iter)) {
+ ret = btrfs_backref_iter_next(iter);
+ if (ret < 0)
+ goto out;
+ /* No extra backref? This means the tree block is corrupted */
+ if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ WARN_ON(cur->checked);
+ if (!list_empty(&cur->upper)) {
+ /*
+ * The backref was added previously when processing backref of
+ * type BTRFS_TREE_BLOCK_REF_KEY
+ */
+ ASSERT(list_is_singular(&cur->upper));
+ edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
+ list[LOWER]);
+ ASSERT(list_empty(&edge->list[UPPER]));
+ exist = edge->node[UPPER];
+ /*
+ * Add the upper level block to pending list if we need check
+ * its backrefs
+ */
+ if (!exist->checked)
+ list_add_tail(&edge->list[UPPER], &cache->pending_edge);
+ } else {
+ exist = NULL;
+ }
+
+ for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
+ struct extent_buffer *eb;
+ struct btrfs_key key;
+ int type;
+
+ cond_resched();
+ eb = btrfs_backref_get_eb(iter);
+
+ key.objectid = iter->bytenr;
+ if (btrfs_backref_iter_is_inline_ref(iter)) {
+ struct btrfs_extent_inline_ref *iref;
+
+ /* Update key for inline backref */
+ iref = (struct btrfs_extent_inline_ref *)
+ ((unsigned long)iter->cur_ptr);
+ type = btrfs_get_extent_inline_ref_type(eb, iref,
+ BTRFS_REF_TYPE_BLOCK);
+ if (type == BTRFS_REF_TYPE_INVALID) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ key.type = type;
+ key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+ } else {
+ key.type = iter->cur_key.type;
+ key.offset = iter->cur_key.offset;
+ }
+
+ /*
+ * Parent node found and matches current inline ref, no need to
+ * rebuild this node for this inline ref
+ */
+ if (exist &&
+ ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
+ exist->owner == key.offset) ||
+ (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
+ exist->bytenr == key.offset))) {
+ exist = NULL;
+ continue;
+ }
+
+ /* SHARED_BLOCK_REF means key.offset is the parent bytenr */
+ if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ ret = handle_direct_tree_backref(cache, &key, cur);
+ if (ret < 0)
+ goto out;
+ continue;
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ ret = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ btrfs_handle_fs_error(fs_info, ret, NULL);
+ goto out;
+ } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
+ continue;
+ }
+
+ /*
+ * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
+ * means the root objectid. We need to search the tree to get
+ * its parent bytenr.
+ */
+ ret = handle_indirect_tree_backref(cache, path, &key, node_key,
+ cur);
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+ cur->checked = 1;
+ WARN_ON(exist);
+out:
+ btrfs_backref_iter_release(iter);
+ return ret;
+}
+
+/*
+ * Finish the upwards linkage created by btrfs_backref_add_tree_node()
+ */
+int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *start)
+{
+ struct list_head *useless_node = &cache->useless_node;
+ struct btrfs_backref_edge *edge;
+ struct rb_node *rb_node;
+ LIST_HEAD(pending_edge);
+
+ ASSERT(start->checked);
+
+ /* Insert this node to cache if it's not COW-only */
+ if (!start->cowonly) {
+ rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
+ &start->rb_node);
+ if (rb_node)
+ btrfs_backref_panic(cache->fs_info, start->bytenr,
+ -EEXIST);
+ list_add_tail(&start->lower, &cache->leaves);
+ }
+
+ /*
+ * Use breadth first search to iterate all related edges.
+ *
+ * The starting points are all the edges of this node
+ */
+ list_for_each_entry(edge, &start->upper, list[LOWER])
+ list_add_tail(&edge->list[UPPER], &pending_edge);
+
+ while (!list_empty(&pending_edge)) {
+ struct btrfs_backref_node *upper;
+ struct btrfs_backref_node *lower;
+
+ edge = list_first_entry(&pending_edge,
+ struct btrfs_backref_edge, list[UPPER]);
+ list_del_init(&edge->list[UPPER]);
+ upper = edge->node[UPPER];
+ lower = edge->node[LOWER];
+
+ /* Parent is detached, no need to keep any edges */
+ if (upper->detached) {
+ list_del(&edge->list[LOWER]);
+ btrfs_backref_free_edge(cache, edge);
+
+ /* Lower node is orphan, queue for cleanup */
+ if (list_empty(&lower->upper))
+ list_add(&lower->list, useless_node);
+ continue;
+ }
+
+ /*
+ * All new nodes added in current build_backref_tree() haven't
+ * been linked to the cache rb tree.
+ * So if we have upper->rb_node populated, this means a cache
+ * hit. We only need to link the edge, as @upper and all its
+ * parents have already been linked.
+ */
+ if (!RB_EMPTY_NODE(&upper->rb_node)) {
+ if (upper->lowest) {
+ list_del_init(&upper->lower);
+ upper->lowest = 0;
+ }
+
+ list_add_tail(&edge->list[UPPER], &upper->lower);
+ continue;
+ }
+
+ /* Sanity check, we shouldn't have any unchecked nodes */
+ if (!upper->checked) {
+ ASSERT(0);
+ return -EUCLEAN;
+ }
+
+ /* Sanity check, COW-only node has non-COW-only parent */
+ if (start->cowonly != upper->cowonly) {
+ ASSERT(0);
+ return -EUCLEAN;
+ }
+
+ /* Only cache non-COW-only (subvolume trees) tree blocks */
+ if (!upper->cowonly) {
+ rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
+ &upper->rb_node);
+ if (rb_node) {
+ btrfs_backref_panic(cache->fs_info,
+ upper->bytenr, -EEXIST);
+ return -EUCLEAN;
+ }
+ }
+
+ list_add_tail(&edge->list[UPPER], &upper->lower);
+
+ /*
+ * Also queue all the parent edges of this uncached node
+ * to finish the upper linkage
+ */
+ list_for_each_entry(edge, &upper->upper, list[LOWER])
+ list_add_tail(&edge->list[UPPER], &pending_edge);
+ }
+ return 0;
+}
+
+void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node)
+{
+ struct btrfs_backref_node *lower;
+ struct btrfs_backref_node *upper;
+ struct btrfs_backref_edge *edge;
+
+ while (!list_empty(&cache->useless_node)) {
+ lower = list_first_entry(&cache->useless_node,
+ struct btrfs_backref_node, list);
+ list_del_init(&lower->list);
+ }
+ while (!list_empty(&cache->pending_edge)) {
+ edge = list_first_entry(&cache->pending_edge,
+ struct btrfs_backref_edge, list[UPPER]);
+ list_del(&edge->list[UPPER]);
+ list_del(&edge->list[LOWER]);
+ lower = edge->node[LOWER];
+ upper = edge->node[UPPER];
+ btrfs_backref_free_edge(cache, edge);
+
+ /*
+ * Lower is no longer linked to any upper backref nodes and
+ * isn't in the cache, we can free it ourselves.
+ */
+ if (list_empty(&lower->upper) &&
+ RB_EMPTY_NODE(&lower->rb_node))
+ list_add(&lower->list, &cache->useless_node);
+
+ if (!RB_EMPTY_NODE(&upper->rb_node))
+ continue;
+
+ /* Add this guy's upper edges to the list to process */
+ list_for_each_entry(edge, &upper->upper, list[LOWER])
+ list_add_tail(&edge->list[UPPER],
+ &cache->pending_edge);
+ if (list_empty(&upper->upper))
+ list_add(&upper->list, &cache->useless_node);
+ }
+
+ while (!list_empty(&cache->useless_node)) {
+ lower = list_first_entry(&cache->useless_node,
+ struct btrfs_backref_node, list);
+ list_del_init(&lower->list);
+ if (lower == node)
+ node = NULL;
+ btrfs_backref_drop_node(cache, lower);
+ }
+
+ btrfs_backref_cleanup_node(cache, node);
+ ASSERT(list_empty(&cache->useless_node) &&
+ list_empty(&cache->pending_edge));
+}