aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c823
1 files changed, 548 insertions, 275 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3983bdaf4b8..a9543f01184c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -16,6 +16,7 @@
#include "volumes.h"
#include "qgroup.h"
#include "tree-mod-log.h"
+#include "tree-checker.h"
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
@@ -113,6 +114,22 @@ noinline void btrfs_release_path(struct btrfs_path *p)
}
/*
+ * We want the transaction abort to print stack trace only for errors where the
+ * cause could be a bug, eg. due to ENOSPC, and not for common errors that are
+ * caused by external factors.
+ */
+bool __cold abort_should_print_stack(int errno)
+{
+ switch (errno) {
+ case -EIO:
+ case -EROFS:
+ case -ENOMEM:
+ return false;
+ }
+ return true;
+}
+
+/*
* safely gets a reference on the root node of a tree. A lock
* is not taken, so a concurrent writer may put a different node
* at the root of the tree. See btrfs_lock_root_node for the
@@ -342,7 +359,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
int level = btrfs_header_level(buf);
ret = btrfs_set_disk_extent_flags(trans, buf,
- new_flags, level, 0);
+ new_flags, level);
if (ret)
return ret;
}
@@ -463,8 +480,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, cow);
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -485,8 +502,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
}
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+ parent_start, last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -726,21 +743,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
}
/*
- * search for key in the extent_buffer. The items start at offset p,
- * and they are item_size apart.
+ * Search for a key in the given extent_buffer.
+ *
+ * The lower boundary for the search is specified by the slot number @low. Use a
+ * value of 0 to search over the whole extent buffer.
*
- * the slot in the array is returned via slot, and it points to
- * the place where you would insert key if it is not found in
- * the array.
+ * The slot in the extent buffer is returned via @slot. If the key exists in the
+ * extent buffer, then @slot will point to the slot where the key is, otherwise
+ * it points to the slot where you would insert the key.
*
- * Slot may point to total number of items if the key is bigger than
- * all of the keys
+ * Slot may point to the total number of items (i.e. one position beyond the last
+ * key) if the key is bigger than the last key in the extent buffer.
*/
-static noinline int generic_bin_search(struct extent_buffer *eb,
- unsigned long p, int item_size,
+static noinline int generic_bin_search(struct extent_buffer *eb, int low,
const struct btrfs_key *key, int *slot)
{
- int low = 0;
+ unsigned long p;
+ int item_size;
int high = btrfs_header_nritems(eb);
int ret;
const int key_size = sizeof(struct btrfs_disk_key);
@@ -753,6 +772,14 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
return -EINVAL;
}
+ if (btrfs_header_level(eb) == 0) {
+ p = offsetof(struct btrfs_leaf, items);
+ item_size = sizeof(struct btrfs_item);
+ } else {
+ p = offsetof(struct btrfs_node, ptrs);
+ item_size = sizeof(struct btrfs_key_ptr);
+ }
+
while (low < high) {
unsigned long oip;
unsigned long offset;
@@ -791,20 +818,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
}
/*
- * simple bin_search frontend that does the right thing for
- * leaves vs nodes
+ * Simple binary search on an extent buffer. Works for both leaves and nodes, and
+ * always searches over the whole range of keys (slot 0 to slot 'nritems - 1').
*/
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int *slot)
{
- if (btrfs_header_level(eb) == 0)
- return generic_bin_search(eb,
- offsetof(struct btrfs_leaf, items),
- sizeof(struct btrfs_item), key, slot);
- else
- return generic_bin_search(eb,
- offsetof(struct btrfs_node, ptrs),
- sizeof(struct btrfs_key_ptr), key, slot);
+ return generic_bin_search(eb, 0, key, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
@@ -843,9 +863,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
btrfs_header_owner(parent),
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
- if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+ if (IS_ERR(eb))
+ return eb;
+ if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
- eb = ERR_PTR(-EIO);
+ return ERR_PTR(-EIO);
}
return eb;
@@ -927,7 +949,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid);
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
/* once for the root ptr */
free_extent_buffer_stale(mid);
return 0;
@@ -986,7 +1008,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
- btrfs_free_tree_block(trans, root, right, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), right,
+ 0, 1);
free_extent_buffer_stale(right);
right = NULL;
} else {
@@ -1031,7 +1054,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
mid = NULL;
} else {
@@ -1345,33 +1368,34 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
{
int i;
int skip_level = level;
- int no_skips = 0;
- struct extent_buffer *t;
+ bool check_skip = true;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
break;
if (!path->locks[i])
break;
- if (!no_skips && path->slots[i] == 0) {
- skip_level = i + 1;
- continue;
- }
- if (!no_skips && path->keep_locks) {
- u32 nritems;
- t = path->nodes[i];
- nritems = btrfs_header_nritems(t);
- if (nritems < 1 || path->slots[i] >= nritems - 1) {
+
+ if (check_skip) {
+ if (path->slots[i] == 0) {
skip_level = i + 1;
continue;
}
+
+ if (path->keep_locks) {
+ u32 nritems;
+
+ nritems = btrfs_header_nritems(path->nodes[i]);
+ if (nritems < 1 || path->slots[i] >= nritems - 1) {
+ skip_level = i + 1;
+ continue;
+ }
+ }
}
- if (skip_level < i && i >= lowest_unlock)
- no_skips = 1;
- t = path->nodes[i];
if (i >= lowest_unlock && i > skip_level) {
- btrfs_tree_unlock_rw(t, path->locks[i]);
+ check_skip = false;
+ btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
path->locks[i] = 0;
if (write_lock_level &&
i > min_write_lock_level &&
@@ -1383,12 +1407,13 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
/*
- * helper function for btrfs_search_slot. The goal is to find a block
- * in cache without setting the path to blocking. If we find the block
- * we return zero and the path is unchanged.
+ * Helper function for btrfs_search_slot() and other functions that do a search
+ * on a btree. The goal is to find a tree block in the cache (the radix tree at
+ * fs_info->buffer_radix), but if we can't find it, or it's not up to date, read
+ * its pages from disk.
*
- * If we can't find the block, we set the path blocking and do some
- * reada. -EAGAIN is returned and the search must be repeated.
+ * Returns -EAGAIN, with the path unlocked, if the caller needs to repeat the
+ * whole btree search, starting again from the current root node.
*/
static int
read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
@@ -1402,12 +1427,21 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
struct btrfs_key first_key;
int ret;
int parent_level;
+ bool unlock_up;
+ unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
blocknr = btrfs_node_blockptr(*eb_ret, slot);
gen = btrfs_node_ptr_generation(*eb_ret, slot);
parent_level = btrfs_header_level(*eb_ret);
btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
+ /*
+ * If we need to read an extent buffer from disk and we are holding locks
+ * on upper level nodes, we unlock all the upper nodes before reading the
+ * extent buffer, and then return -EAGAIN to the caller as it needs to
+ * restart the search. We don't release the lock on the current level
+ * because we need to walk this node to figure out which blocks to read.
+ */
tmp = find_extent_buffer(fs_info, blocknr);
if (tmp) {
if (p->reada == READA_FORWARD_ALWAYS)
@@ -1429,47 +1463,68 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
return 0;
}
+ if (p->nowait) {
+ free_extent_buffer(tmp);
+ return -EAGAIN;
+ }
+
+ if (unlock_up)
+ btrfs_unlock_up_safe(p, level + 1);
+
/* now we're allowed to do a blocking uptodate check */
- ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
- if (!ret) {
- *eb_ret = tmp;
- return 0;
+ ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key);
+ if (ret) {
+ free_extent_buffer(tmp);
+ btrfs_release_path(p);
+ return -EIO;
}
- free_extent_buffer(tmp);
- btrfs_release_path(p);
- return -EIO;
+ if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
+ free_extent_buffer(tmp);
+ btrfs_release_path(p);
+ return -EUCLEAN;
+ }
+
+ if (unlock_up)
+ ret = -EAGAIN;
+
+ goto out;
+ } else if (p->nowait) {
+ return -EAGAIN;
}
- /*
- * reduce lock contention at high levels
- * of the btree by dropping locks before
- * we read. Don't release the lock on the current
- * level because we need to walk this node to figure
- * out which blocks to read.
- */
- btrfs_unlock_up_safe(p, level + 1);
+ if (unlock_up) {
+ btrfs_unlock_up_safe(p, level + 1);
+ ret = -EAGAIN;
+ } else {
+ ret = 0;
+ }
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
- ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
gen, parent_level - 1, &first_key);
- if (!IS_ERR(tmp)) {
- /*
- * If the read above didn't mark this buffer up to date,
- * it will never end up being up to date. Set ret to EIO now
- * and give up so that our caller doesn't loop forever
- * on our EAGAINs.
- */
- if (!extent_buffer_uptodate(tmp))
- ret = -EIO;
- free_extent_buffer(tmp);
+ if (IS_ERR(tmp)) {
+ btrfs_release_path(p);
+ return PTR_ERR(tmp);
+ }
+ /*
+ * If the read above didn't mark this buffer up to date,
+ * it will never end up being up to date. Set ret to EIO now
+ * and give up so that our caller doesn't loop forever
+ * on our EAGAINs.
+ */
+ if (!extent_buffer_uptodate(tmp))
+ ret = -EIO;
+
+out:
+ if (ret == 0) {
+ *eb_ret = tmp;
} else {
- ret = PTR_ERR(tmp);
+ free_extent_buffer(tmp);
+ btrfs_release_path(p);
}
- btrfs_release_path(p);
return ret;
}
@@ -1567,35 +1622,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
struct btrfs_path *p,
int write_lock_level)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
- int root_lock;
+ int root_lock = 0;
int level = 0;
- /* We try very hard to do read locks on the root */
- root_lock = BTRFS_READ_LOCK;
-
if (p->search_commit_root) {
- /*
- * The commit roots are read only so we always do read locks,
- * and we always must hold the commit_root_sem when doing
- * searches on them, the only exception is send where we don't
- * want to block transaction commits for a long time, so
- * we need to clone the commit root in order to avoid races
- * with transaction commits that create a snapshot of one of
- * the roots used by a send operation.
- */
- if (p->need_commit_sem) {
- down_read(&fs_info->commit_root_sem);
- b = btrfs_clone_extent_buffer(root->commit_root);
- up_read(&fs_info->commit_root_sem);
- if (!b)
- return ERR_PTR(-ENOMEM);
-
- } else {
- b = root->commit_root;
- atomic_inc(&b->refs);
- }
+ b = root->commit_root;
+ atomic_inc(&b->refs);
level = btrfs_header_level(b);
/*
* Ensure that all callers have set skip_locking when
@@ -1612,6 +1645,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
goto out;
}
+ /* We try very hard to do read locks on the root */
+ root_lock = BTRFS_READ_LOCK;
+
/*
* If the level is set to maximum, we can skip trying to get the read
* lock.
@@ -1621,7 +1657,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
* We don't know the level of the root node until we actually
* have it read locked
*/
- b = btrfs_read_lock_root_node(root);
+ if (p->nowait) {
+ b = btrfs_try_read_lock_root_node(root);
+ if (IS_ERR(b))
+ return b;
+ } else {
+ b = btrfs_read_lock_root_node(root);
+ }
level = btrfs_header_level(b);
if (level > write_lock_level)
goto out;
@@ -1638,6 +1680,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
level = btrfs_header_level(b);
out:
+ /*
+ * The root may have failed to write out at some point, and thus is no
+ * longer valid, return an error in this case.
+ */
+ if (!extent_buffer_uptodate(b)) {
+ if (root_lock)
+ btrfs_tree_unlock_rw(b, root_lock);
+ free_extent_buffer(b);
+ return ERR_PTR(-EIO);
+ }
+
p->nodes[level] = b;
if (!p->skip_locking)
p->locks[level] = root_lock;
@@ -1647,6 +1700,191 @@ out:
return b;
}
+/*
+ * Replace the extent buffer at the lowest level of the path with a cloned
+ * version. The purpose is to be able to use it safely, after releasing the
+ * commit root semaphore, even if relocation is happening in parallel, the
+ * transaction used for relocation is committed and the extent buffer is
+ * reallocated in the next transaction.
+ *
+ * This is used in a context where the caller does not prevent transaction
+ * commits from happening, either by holding a transaction handle or holding
+ * some lock, while it's doing searches through a commit root.
+ * At the moment it's only used for send operations.
+ */
+static int finish_need_commit_sem_search(struct btrfs_path *path)
+{
+ const int i = path->lowest_level;
+ const int slot = path->slots[i];
+ struct extent_buffer *lowest = path->nodes[i];
+ struct extent_buffer *clone;
+
+ ASSERT(path->need_commit_sem);
+
+ if (!lowest)
+ return 0;
+
+ lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
+
+ clone = btrfs_clone_extent_buffer(lowest);
+ if (!clone)
+ return -ENOMEM;
+
+ btrfs_release_path(path);
+ path->nodes[i] = clone;
+ path->slots[i] = slot;
+
+ return 0;
+}
+
+static inline int search_for_key_slot(struct extent_buffer *eb,
+ int search_low_slot,
+ const struct btrfs_key *key,
+ int prev_cmp,
+ int *slot)
+{
+ /*
+ * If a previous call to btrfs_bin_search() on a parent node returned an
+ * exact match (prev_cmp == 0), we can safely assume the target key will
+ * always be at slot 0 on lower levels, since each key pointer
+ * (struct btrfs_key_ptr) refers to the lowest key accessible from the
+ * subtree it points to. Thus we can skip searching lower levels.
+ */
+ if (prev_cmp == 0) {
+ *slot = 0;
+ return 0;
+ }
+
+ return generic_bin_search(eb, search_low_slot, key, slot);
+}
+
+static int search_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const struct btrfs_key *key,
+ struct btrfs_path *path,
+ int ins_len,
+ int prev_cmp)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ int leaf_free_space = -1;
+ int search_low_slot = 0;
+ int ret;
+ bool do_bin_search = true;
+
+ /*
+ * If we are doing an insertion, the leaf has enough free space and the
+ * destination slot for the key is not slot 0, then we can unlock our
+ * write lock on the parent, and any other upper nodes, before doing the
+ * binary search on the leaf (with search_for_key_slot()), allowing other
+ * tasks to lock the parent and any other upper nodes.
+ */
+ if (ins_len > 0) {
+ /*
+ * Cache the leaf free space, since we will need it later and it
+ * will not change until then.
+ */
+ leaf_free_space = btrfs_leaf_free_space(leaf);
+
+ /*
+ * !path->locks[1] means we have a single node tree, the leaf is
+ * the root of the tree.
+ */
+ if (path->locks[1] && leaf_free_space >= ins_len) {
+ struct btrfs_disk_key first_key;
+
+ ASSERT(btrfs_header_nritems(leaf) > 0);
+ btrfs_item_key(leaf, &first_key, 0);
+
+ /*
+ * Doing the extra comparison with the first key is cheap,
+ * taking into account that the first key is very likely
+ * already in a cache line because it immediately follows
+ * the extent buffer's header and we have recently accessed
+ * the header's level field.
+ */
+ ret = comp_keys(&first_key, key);
+ if (ret < 0) {
+ /*
+ * The first key is smaller than the key we want
+ * to insert, so we are safe to unlock all upper
+ * nodes and we have to do the binary search.
+ *
+ * We do use btrfs_unlock_up_safe() and not
+ * unlock_up() because the later does not unlock
+ * nodes with a slot of 0 - we can safely unlock
+ * any node even if its slot is 0 since in this
+ * case the key does not end up at slot 0 of the
+ * leaf and there's no need to split the leaf.
+ */
+ btrfs_unlock_up_safe(path, 1);
+ search_low_slot = 1;
+ } else {
+ /*
+ * The first key is >= then the key we want to
+ * insert, so we can skip the binary search as
+ * the target key will be at slot 0.
+ *
+ * We can not unlock upper nodes when the key is
+ * less than the first key, because we will need
+ * to update the key at slot 0 of the parent node
+ * and possibly of other upper nodes too.
+ * If the key matches the first key, then we can
+ * unlock all the upper nodes, using
+ * btrfs_unlock_up_safe() instead of unlock_up()
+ * as stated above.
+ */
+ if (ret == 0)
+ btrfs_unlock_up_safe(path, 1);
+ /*
+ * ret is already 0 or 1, matching the result of
+ * a btrfs_bin_search() call, so there is no need
+ * to adjust it.
+ */
+ do_bin_search = false;
+ path->slots[0] = 0;
+ }
+ }
+ }
+
+ if (do_bin_search) {
+ ret = search_for_key_slot(leaf, search_low_slot, key,
+ prev_cmp, &path->slots[0]);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (ins_len > 0) {
+ /*
+ * Item key already exists. In this case, if we are allowed to
+ * insert the item (for example, in dir_item case, item key
+ * collision is allowed), it will be merged with the original
+ * item. Only the item size grows, no new btrfs item will be
+ * added. If search_for_extension is not set, ins_len already
+ * accounts the size btrfs_item, deduct it here so leaf space
+ * check will be correct.
+ */
+ if (ret == 0 && !path->search_for_extension) {
+ ASSERT(ins_len >= sizeof(struct btrfs_item));
+ ins_len -= sizeof(struct btrfs_item);
+ }
+
+ ASSERT(leaf_free_space >= 0);
+
+ if (leaf_free_space < ins_len) {
+ int err;
+
+ err = split_leaf(trans, root, key, path, ins_len,
+ (ret == 0));
+ ASSERT(err <= 0);
+ if (WARN_ON(err > 0))
+ err = -EUCLEAN;
+ if (err)
+ ret = err;
+ }
+ }
+
+ return ret;
+}
/*
* btrfs_search_slot - look for a key in a tree and perform necessary
@@ -1683,6 +1921,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p,
int ins_len, int cow)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
int slot;
int ret;
@@ -1700,6 +1939,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
WARN_ON(p->nodes[0] != NULL);
BUG_ON(!cow && ins_len);
+ /*
+ * For now only allow nowait for read only operations. There's no
+ * strict reason why we can't, we just only need it for reads so it's
+ * only implemented for reads.
+ */
+ ASSERT(!p->nowait || !cow);
+
if (ins_len < 0) {
lowest_unlock = 2;
@@ -1724,6 +1970,16 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
min_write_lock_level = write_lock_level;
+ if (p->need_commit_sem) {
+ ASSERT(p->search_commit_root);
+ if (p->nowait) {
+ if (!down_read_trylock(&fs_info->commit_root_sem))
+ return -EAGAIN;
+ } else {
+ down_read(&fs_info->commit_root_sem);
+ }
+ }
+
again:
prev_cmp = -1;
b = btrfs_search_slot_get_root(root, p, write_lock_level);
@@ -1777,10 +2033,6 @@ again:
}
cow_done:
p->nodes[level] = b;
- /*
- * Leave path with blocking locks to avoid massive
- * lock context switch, this is made on purpose.
- */
/*
* we have a lock on b and as long as we aren't changing
@@ -1802,62 +2054,22 @@ cow_done:
}
}
- /*
- * If btrfs_bin_search returns an exact match (prev_cmp == 0)
- * we can safely assume the target key will always be in slot 0
- * on lower levels due to the invariants BTRFS' btree provides,
- * namely that a btrfs_key_ptr entry always points to the
- * lowest key in the child node, thus we can skip searching
- * lower levels
- */
- if (prev_cmp == 0) {
- slot = 0;
- ret = 0;
- } else {
- ret = btrfs_bin_search(b, key, &slot);
- prev_cmp = ret;
- if (ret < 0)
- goto done;
- }
-
if (level == 0) {
- p->slots[level] = slot;
- /*
- * Item key already exists. In this case, if we are
- * allowed to insert the item (for example, in dir_item
- * case, item key collision is allowed), it will be
- * merged with the original item. Only the item size
- * grows, no new btrfs item will be added. If
- * search_for_extension is not set, ins_len already
- * accounts the size btrfs_item, deduct it here so leaf
- * space check will be correct.
- */
- if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
- ASSERT(ins_len >= sizeof(struct btrfs_item));
- ins_len -= sizeof(struct btrfs_item);
- }
- if (ins_len > 0 &&
- btrfs_leaf_free_space(b) < ins_len) {
- if (write_lock_level < 1) {
- write_lock_level = 1;
- btrfs_release_path(p);
- goto again;
- }
-
- err = split_leaf(trans, root, key,
- p, ins_len, ret == 0);
+ if (ins_len > 0)
+ ASSERT(write_lock_level >= 1);
- BUG_ON(err > 0);
- if (err) {
- ret = err;
- goto done;
- }
- }
+ ret = search_leaf(trans, root, key, p, ins_len, prev_cmp);
if (!p->search_for_split)
unlock_up(p, level, lowest_unlock,
min_write_lock_level, NULL);
goto done;
}
+
+ ret = search_for_key_slot(b, 0, key, prev_cmp, &slot);
+ if (ret < 0)
+ goto done;
+ prev_cmp = ret;
+
if (ret && slot > 0) {
dec = 1;
slot--;
@@ -1904,11 +2116,22 @@ cow_done:
if (!p->skip_locking) {
level = btrfs_header_level(b);
+
+ btrfs_maybe_reset_lockdep_class(root, b);
+
if (level <= write_lock_level) {
btrfs_tree_lock(b);
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
- btrfs_tree_read_lock(b);
+ if (p->nowait) {
+ if (!btrfs_try_tree_read_lock(b)) {
+ free_extent_buffer(b);
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ btrfs_tree_read_lock(b);
+ }
p->locks[level] = BTRFS_READ_LOCK;
}
p->nodes[level] = b;
@@ -1918,6 +2141,16 @@ cow_done:
done:
if (ret < 0 && !p->skip_release_on_error)
btrfs_release_path(p);
+
+ if (p->need_commit_sem) {
+ int ret2;
+
+ ret2 = finish_need_commit_sem_search(p);
+ up_read(&fs_info->commit_root_sem);
+ if (ret2)
+ ret = ret2;
+ }
+
return ret;
}
ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
@@ -1947,6 +2180,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
+ ASSERT(!p->nowait);
if (p->search_commit_root) {
BUG_ON(time_seq);
@@ -2123,6 +2357,43 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
return ret;
}
+/**
+ * Search for a valid slot for the given path.
+ *
+ * @root: The root node of the tree.
+ * @key: Will contain a valid item if found.
+ * @path: The starting point to validate the slot.
+ *
+ * Return: 0 if the item is valid
+ * 1 if not found
+ * <0 if error.
+ */
+int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *path)
+{
+ while (1) {
+ int ret;
+ const int slot = path->slots[0];
+ const struct extent_buffer *leaf = path->nodes[0];
+
+ /* This is where we start walking the path. */
+ if (slot >= btrfs_header_nritems(leaf)) {
+ /*
+ * If we've reached the last slot in this leaf we need
+ * to go to the next leaf and reset the path.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ return ret;
+ continue;
+ }
+ /* Store the found, valid item in @key. */
+ btrfs_item_key_to_cpu(leaf, key, slot);
+ break;
+ }
+ return 0;
+}
+
/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
@@ -2615,19 +2886,14 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
*/
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
{
- struct btrfs_item *start_item;
- struct btrfs_item *end_item;
int data_len;
int nritems = btrfs_header_nritems(l);
int end = min(nritems, start + nr) - 1;
if (!nr)
return 0;
- start_item = btrfs_item_nr(start);
- end_item = btrfs_item_nr(end);
- data_len = btrfs_item_offset(l, start_item) +
- btrfs_item_size(l, start_item);
- data_len = data_len - btrfs_item_offset(l, end_item);
+ data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start);
+ data_len = data_len - btrfs_item_offset(l, end);
data_len += sizeof(struct btrfs_item) * nr;
WARN_ON(data_len < 0);
return data_len;
@@ -2674,7 +2940,6 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
u32 i;
int push_space = 0;
int push_items = 0;
- struct btrfs_item *item;
u32 nr;
u32 right_nritems;
u32 data_end;
@@ -2691,8 +2956,6 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(i);
-
if (!empty && push_items > 0) {
if (path->slots[0] > i)
break;
@@ -2707,12 +2970,13 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
if (path->slots[0] == i)
push_space += data_size;
- this_item_size = btrfs_item_size(left, item);
- if (this_item_size + sizeof(*item) + push_space > free_space)
+ this_item_size = btrfs_item_size(left, i);
+ if (this_item_size + sizeof(struct btrfs_item) +
+ push_space > free_space)
break;
push_items++;
- push_space += this_item_size + sizeof(*item);
+ push_space += this_item_size + sizeof(struct btrfs_item);
if (i == 0)
break;
i--;
@@ -2726,7 +2990,7 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
/* push left to right */
right_nritems = btrfs_header_nritems(right);
- push_space = btrfs_item_end_nr(left, left_nritems - push_items);
+ push_space = btrfs_item_data_end(left, left_nritems - push_items);
push_space -= leaf_data_end(left);
/* make room in the right data area */
@@ -2757,9 +3021,8 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(i);
- push_space -= btrfs_token_item_size(&token, item);
- btrfs_set_token_item_offset(&token, item, push_space);
+ push_space -= btrfs_token_item_size(&token, i);
+ btrfs_set_token_item_offset(&token, i, push_space);
}
left_nritems -= push_items;
@@ -2844,16 +3107,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (free_space < data_size)
goto out_unlock;
- /* cow and double check */
ret = btrfs_cow_block(trans, root, right, upper,
slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
if (ret)
goto out_unlock;
- free_space = btrfs_leaf_free_space(right);
- if (free_space < data_size)
- goto out_unlock;
-
left_nritems = btrfs_header_nritems(left);
if (left_nritems == 0)
goto out_unlock;
@@ -2904,7 +3162,6 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
int i;
int push_space = 0;
int push_items = 0;
- struct btrfs_item *item;
u32 old_left_nritems;
u32 nr;
int ret = 0;
@@ -2918,8 +3175,6 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(i);
-
if (!empty && push_items > 0) {
if (path->slots[0] < i)
break;
@@ -2934,12 +3189,13 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
if (path->slots[0] == i)
push_space += data_size;
- this_item_size = btrfs_item_size(right, item);
- if (this_item_size + sizeof(*item) + push_space > free_space)
+ this_item_size = btrfs_item_size(right, i);
+ if (this_item_size + sizeof(struct btrfs_item) + push_space >
+ free_space)
break;
push_items++;
- push_space += this_item_size + sizeof(*item);
+ push_space += this_item_size + sizeof(struct btrfs_item);
}
if (push_items == 0) {
@@ -2955,25 +3211,23 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
push_items * sizeof(struct btrfs_item));
push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
- btrfs_item_offset_nr(right, push_items - 1);
+ btrfs_item_offset(right, push_items - 1);
copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
leaf_data_end(left) - push_space,
BTRFS_LEAF_DATA_OFFSET +
- btrfs_item_offset_nr(right, push_items - 1),
+ btrfs_item_offset(right, push_items - 1),
push_space);
old_left_nritems = btrfs_header_nritems(left);
BUG_ON(old_left_nritems <= 0);
btrfs_init_map_token(&token, left);
- old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
+ old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
-
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item,
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i,
ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
@@ -2984,7 +3238,7 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
right_nritems);
if (push_items < right_nritems) {
- push_space = btrfs_item_offset_nr(right, push_items - 1) -
+ push_space = btrfs_item_offset(right, push_items - 1) -
leaf_data_end(right);
memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
@@ -3002,10 +3256,8 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(i);
-
- push_space = push_space - btrfs_token_item_size(&token, item);
- btrfs_set_token_item_offset(&token, item, push_space);
+ push_space = push_space - btrfs_token_item_size(&token, i);
+ btrfs_set_token_item_offset(&token, i, push_space);
}
btrfs_mark_buffer_dirty(left);
@@ -3084,7 +3336,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- /* cow and double check */
ret = btrfs_cow_block(trans, root, left,
path->nodes[1], slot - 1, &left,
BTRFS_NESTING_LEFT_COW);
@@ -3095,12 +3346,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- free_space = btrfs_leaf_free_space(left);
- if (free_space < data_size) {
- ret = 1;
- goto out;
- }
-
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
goto out;
@@ -3133,7 +3378,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
- data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
+ data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
btrfs_item_nr_offset(mid),
@@ -3144,15 +3389,14 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
data_copy_size, BTRFS_LEAF_DATA_OFFSET +
leaf_data_end(l), data_copy_size);
- rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
+ rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
btrfs_init_map_token(&token, right);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
}
btrfs_set_header_nritems(l, mid);
@@ -3268,7 +3512,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
l = path->nodes[0];
slot = path->slots[0];
- if (extend && data_size + btrfs_item_size_nr(l, slot) +
+ if (extend && data_size + btrfs_item_size(l, slot) +
sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
return -EOVERFLOW;
@@ -3437,7 +3681,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
if (btrfs_leaf_free_space(leaf) >= ins_len)
return 0;
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ item_size = btrfs_item_size(leaf, path->slots[0]);
if (key.type == BTRFS_EXTENT_DATA_KEY) {
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -3457,7 +3701,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
ret = -EAGAIN;
leaf = path->nodes[0];
/* if our item isn't there, return now */
- if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
+ if (item_size != btrfs_item_size(leaf, path->slots[0]))
goto err;
/* the leaf has changed, it now has room. return now */
@@ -3488,9 +3732,7 @@ static noinline int split_item(struct btrfs_path *path,
unsigned long split_offset)
{
struct extent_buffer *leaf;
- struct btrfs_item *item;
- struct btrfs_item *new_item;
- int slot;
+ int orig_slot, slot;
char *buf;
u32 nritems;
u32 item_size;
@@ -3500,9 +3742,9 @@ static noinline int split_item(struct btrfs_path *path,
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
- item = btrfs_item_nr(path->slots[0]);
- orig_offset = btrfs_item_offset(leaf, item);
- item_size = btrfs_item_size(leaf, item);
+ orig_slot = path->slots[0];
+ orig_offset = btrfs_item_offset(leaf, path->slots[0]);
+ item_size = btrfs_item_size(leaf, path->slots[0]);
buf = kmalloc(item_size, GFP_NOFS);
if (!buf)
@@ -3523,14 +3765,12 @@ static noinline int split_item(struct btrfs_path *path,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(slot);
-
- btrfs_set_item_offset(leaf, new_item, orig_offset);
- btrfs_set_item_size(leaf, new_item, item_size - split_offset);
+ btrfs_set_item_offset(leaf, slot, orig_offset);
+ btrfs_set_item_size(leaf, slot, item_size - split_offset);
- btrfs_set_item_offset(leaf, item,
- orig_offset + item_size - split_offset);
- btrfs_set_item_size(leaf, item, split_offset);
+ btrfs_set_item_offset(leaf, orig_slot,
+ orig_offset + item_size - split_offset);
+ btrfs_set_item_size(leaf, orig_slot, split_offset);
btrfs_set_header_nritems(leaf, nritems + 1);
@@ -3591,7 +3831,6 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
- struct btrfs_item *item;
u32 nritems;
unsigned int data_end;
unsigned int old_data_start;
@@ -3603,14 +3842,14 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
leaf = path->nodes[0];
slot = path->slots[0];
- old_size = btrfs_item_size_nr(leaf, slot);
+ old_size = btrfs_item_size(leaf, slot);
if (old_size == new_size)
return;
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(leaf);
- old_data_start = btrfs_item_offset_nr(leaf, slot);
+ old_data_start = btrfs_item_offset(leaf, slot);
size_diff = old_size - new_size;
@@ -3624,10 +3863,9 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item, ioff + size_diff);
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i, ioff + size_diff);
}
/* shift the data */
@@ -3670,8 +3908,7 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
fixup_low_keys(path, &disk_key, 1);
}
- item = btrfs_item_nr(slot);
- btrfs_set_item_size(leaf, item, new_size);
+ btrfs_set_item_size(leaf, slot, new_size);
btrfs_mark_buffer_dirty(leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
@@ -3687,7 +3924,6 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
{
int slot;
struct extent_buffer *leaf;
- struct btrfs_item *item;
u32 nritems;
unsigned int data_end;
unsigned int old_data;
@@ -3705,7 +3941,7 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
BUG();
}
slot = path->slots[0];
- old_data = btrfs_item_end_nr(leaf, slot);
+ old_data = btrfs_item_data_end(leaf, slot);
BUG_ON(slot < 0);
if (slot >= nritems) {
@@ -3722,10 +3958,9 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item, ioff - data_size);
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i, ioff - data_size);
}
/* shift the data */
@@ -3734,9 +3969,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
data_end, old_data - data_end);
data_end = old_data;
- old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(slot);
- btrfs_set_item_size(leaf, item, old_size + data_size);
+ old_size = btrfs_item_size(leaf, slot);
+ btrfs_set_item_size(leaf, slot, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
@@ -3758,7 +3992,6 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
const struct btrfs_item_batch *batch)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_item *item;
int i;
u32 nritems;
unsigned int data_end;
@@ -3795,7 +4028,7 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
btrfs_init_map_token(&token, leaf);
if (slot != nritems) {
- unsigned int old_data = btrfs_item_end_nr(leaf, slot);
+ unsigned int old_data = btrfs_item_data_end(leaf, slot);
if (old_data < data_end) {
btrfs_print_leaf(leaf);
@@ -3811,10 +4044,9 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item,
- ioff - batch->total_data_size);
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i,
+ ioff - batch->total_data_size);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
@@ -3833,10 +4065,9 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
for (i = 0; i < batch->nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(slot + i);
data_end -= batch->data_sizes[i];
- btrfs_set_token_item_offset(&token, item, data_end);
- btrfs_set_token_item_size(&token, item, batch->data_sizes[i]);
+ btrfs_set_token_item_offset(&token, slot + i, data_end);
+ btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
}
btrfs_set_header_nritems(leaf, nritems + batch->nr);
@@ -3943,7 +4174,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
u32 item_size;
leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ item_size = btrfs_item_size(leaf, path->slots[0]);
ret = setup_leaf_for_split(trans, root, path,
item_size + sizeof(struct btrfs_item));
if (ret)
@@ -4032,7 +4263,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
atomic_inc(&leaf->refs);
- btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
}
/*
@@ -4044,25 +4275,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
- struct btrfs_item *item;
- u32 last_off;
- u32 dsize = 0;
int ret = 0;
int wret;
- int i;
u32 nritems;
leaf = path->nodes[0];
- last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
-
- for (i = 0; i < nr; i++)
- dsize += btrfs_item_size_nr(leaf, slot + i);
-
nritems = btrfs_header_nritems(leaf);
if (slot + nr != nritems) {
- int data_end = leaf_data_end(leaf);
+ const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
+ const int data_end = leaf_data_end(leaf);
struct btrfs_map_token token;
+ u32 dsize = 0;
+ int i;
+
+ for (i = 0; i < nr; i++)
+ dsize += btrfs_item_size(leaf, slot + i);
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize,
@@ -4073,9 +4301,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(i);
- ioff = btrfs_token_item_offset(&token, item);
- btrfs_set_token_item_offset(&token, item, ioff + dsize);
+ ioff = btrfs_token_item_offset(&token, i);
+ btrfs_set_token_item_offset(&token, i, ioff + dsize);
}
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -4103,24 +4330,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
fixup_low_keys(path, &disk_key, 1);
}
- /* delete the leaf if it is mostly empty */
+ /*
+ * Try to delete the leaf if it is mostly empty. We do this by
+ * trying to move all its items into its left and right neighbours.
+ * If we can't move all the items, then we don't delete it - it's
+ * not ideal, but future insertions might fill the leaf with more
+ * items, or items from other leaves might be moved later into our
+ * leaf due to deletions on those leaves.
+ */
if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
+ u32 min_push_space;
+
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
* for possible call to del_ptr below
*/
slot = path->slots[1];
atomic_inc(&leaf->refs);
-
- wret = push_leaf_left(trans, root, path, 1, 1,
- 1, (u32)-1);
+ /*
+ * We want to be able to at least push one item to the
+ * left neighbour leaf, and that's the first item.
+ */
+ min_push_space = sizeof(struct btrfs_item) +
+ btrfs_item_size(leaf, 0);
+ wret = push_leaf_left(trans, root, path, 0,
+ min_push_space, 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1,
- 1, 1, 0);
+ /*
+ * If we were not able to push all items from our
+ * leaf to its left neighbour, then attempt to
+ * either push all the remaining items to the
+ * right neighbour or none. There's no advantage
+ * in pushing only some items, instead of all, as
+ * it's pointless to end up with a leaf having
+ * too few items while the neighbours can be full
+ * or nearly full.
+ */
+ nritems = btrfs_header_nritems(leaf);
+ min_push_space = leaf_space_used(leaf, 0, nritems);
+ wret = push_leaf_right(trans, root, path, 0,
+ min_push_space, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
@@ -4229,6 +4482,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
int ret = 1;
int keep_locks = path->keep_locks;
+ ASSERT(!path->nowait);
path->keep_locks = 1;
again:
cur = btrfs_read_lock_root_node(root);
@@ -4402,11 +4656,15 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int level;
struct extent_buffer *c;
struct extent_buffer *next;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
+ bool need_commit_sem = false;
u32 nritems;
int ret;
int i;
+ ASSERT(!path->nowait);
+
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
return 1;
@@ -4419,14 +4677,20 @@ again:
path->keep_locks = 1;
- if (time_seq)
+ if (time_seq) {
ret = btrfs_search_old_slot(root, &key, path, time_seq);
- else
+ } else {
+ if (path->need_commit_sem) {
+ path->need_commit_sem = 0;
+ need_commit_sem = true;
+ down_read(&fs_info->commit_root_sem);
+ }
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ }
path->keep_locks = 0;
if (ret < 0)
- return ret;
+ goto done;
nritems = btrfs_header_nritems(path->nodes[0]);
/*
@@ -4549,6 +4813,15 @@ again:
ret = 0;
done:
unlock_up(path, 0, 1, 0, NULL);
+ if (need_commit_sem) {
+ int ret2;
+
+ path->need_commit_sem = 1;
+ ret2 = finish_need_commit_sem_search(path);
+ up_read(&fs_info->commit_root_sem);
+ if (ret2)
+ ret = ret2;
+ }
return ret;
}