aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1096
1 files changed, 794 insertions, 302 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..72a2b9c28e9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -21,6 +21,7 @@
#include <linux/blkdev.h>
#include <linux/sort.h>
#include <linux/rcupdate.h>
+#include <linux/kthread.h>
#include "compat.h"
#include "hash.h"
#include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
+static noinline int
+block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+ smp_mb();
+ return cache->cached == BTRFS_CACHE_FINISHED;
+}
+
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
@@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
}
/*
+ * We always set EXTENT_LOCKED for the super mirror extents so we don't
+ * overwrite them, so those bits need to be unset. Also, if we are unmounting
+ * with pinned extents still sitting there because we had a block group caching,
+ * we need to clear those now, since we are done.
+ */
+void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+{
+ u64 start, end, last = 0;
+ int ret;
+
+ while (1) {
+ ret = find_first_extent_bit(&info->pinned_extents, last,
+ &start, &end,
+ EXTENT_LOCKED|EXTENT_DIRTY);
+ if (ret)
+ break;
+
+ clear_extent_bits(&info->pinned_extents, start, end,
+ EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
+ last = end+1;
+ }
+}
+
+static int remove_sb_from_cache(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr,
+ 0, &logical, &nr, &stripe_len);
+ BUG_ON(ret);
+ while (nr--) {
+ try_lock_extent(&fs_info->pinned_extents,
+ logical[nr],
+ logical[nr] + stripe_len - 1, GFP_NOFS);
+ }
+ kfree(logical);
+ }
+
+ return 0;
+}
+
+/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
-static int add_new_free_space(struct btrfs_block_group_cache *block_group,
+static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
{
- u64 extent_start, extent_end, size;
+ u64 extent_start, extent_end, size, total_added = 0;
int ret;
while (start < end) {
ret = find_first_extent_bit(&info->pinned_extents, start,
&extent_start, &extent_end,
- EXTENT_DIRTY);
+ EXTENT_DIRTY|EXTENT_LOCKED);
if (ret)
break;
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
+ total_added += size;
ret = btrfs_add_free_space(block_group, start,
size);
BUG_ON(ret);
@@ -178,84 +237,93 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
if (start < end) {
size = end - start;
+ total_added += size;
ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret);
}
- return 0;
+ return total_added;
}
-static int remove_sb_from_cache(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- u64 bytenr;
- u64 *logical;
- int stripe_len;
- int i, nr, ret;
-
- for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
- cache->key.objectid, bytenr, 0,
- &logical, &nr, &stripe_len);
- BUG_ON(ret);
- while (nr--) {
- btrfs_remove_free_space(cache, logical[nr],
- stripe_len);
- }
- kfree(logical);
- }
- return 0;
-}
-
-static int cache_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group)
+static int caching_kthread(void *data)
{
+ struct btrfs_block_group_cache *block_group = data;
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ u64 last = 0;
struct btrfs_path *path;
int ret = 0;
struct btrfs_key key;
struct extent_buffer *leaf;
int slot;
- u64 last;
+ u64 total_found = 0;
- if (!block_group)
- return 0;
-
- root = root->fs_info->extent_root;
-
- if (block_group->cached)
- return 0;
+ BUG_ON(!fs_info);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
+ atomic_inc(&block_group->space_info->caching_threads);
+ last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
/*
- * we get into deadlocks with paths held by callers of this function.
- * since the alloc_mutex is protecting things right now, just
- * skip the locking here
+ * We don't want to deadlock with somebody trying to allocate a new
+ * extent for the extent root while also trying to search the extent
+ * root to add free space. So we skip locking and search the commit
+ * root, since its read-only
*/
path->skip_locking = 1;
- last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ path->search_commit_root = 1;
+ path->reada = 2;
+
key.objectid = last;
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+again:
+ /* need to make sure the commit_root doesn't disappear */
+ down_read(&fs_info->extent_commit_sem);
+
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
while (1) {
+ smp_mb();
+ if (block_group->fs_info->closing > 1) {
+ last = (u64)-1;
+ break;
+ }
+
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
+ ret = btrfs_next_leaf(fs_info->extent_root, path);
if (ret < 0)
goto err;
- if (ret == 0)
- continue;
- else
+ else if (ret)
break;
+
+ if (need_resched() ||
+ btrfs_transaction_in_commit(fs_info)) {
+ leaf = path->nodes[0];
+
+ /* this shouldn't happen, but if the
+ * leaf is empty just move on.
+ */
+ if (btrfs_header_nritems(leaf) == 0)
+ break;
+ /*
+ * we need to copy the key out so that
+ * we are sure the next search advances
+ * us forward in the btree.
+ */
+ btrfs_item_key_to_cpu(leaf, &key, 0);
+ btrfs_release_path(fs_info->extent_root, path);
+ up_read(&fs_info->extent_commit_sem);
+ schedule_timeout(1);
+ goto again;
+ }
+
+ continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid < block_group->key.objectid)
@@ -266,24 +334,59 @@ static int cache_block_group(struct btrfs_root *root,
break;
if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
- add_new_free_space(block_group, root->fs_info, last,
- key.objectid);
-
+ total_found += add_new_free_space(block_group,
+ fs_info, last,
+ key.objectid);
last = key.objectid + key.offset;
}
+
+ if (total_found > (1024 * 1024 * 2)) {
+ total_found = 0;
+ wake_up(&block_group->caching_q);
+ }
next:
path->slots[0]++;
}
+ ret = 0;
- add_new_free_space(block_group, root->fs_info, last,
- block_group->key.objectid +
- block_group->key.offset);
+ total_found += add_new_free_space(block_group, fs_info, last,
+ block_group->key.objectid +
+ block_group->key.offset);
+
+ spin_lock(&block_group->lock);
+ block_group->cached = BTRFS_CACHE_FINISHED;
+ spin_unlock(&block_group->lock);
- block_group->cached = 1;
- remove_sb_from_cache(root, block_group);
- ret = 0;
err:
btrfs_free_path(path);
+ up_read(&fs_info->extent_commit_sem);
+ atomic_dec(&block_group->space_info->caching_threads);
+ wake_up(&block_group->caching_q);
+
+ return 0;
+}
+
+static int cache_block_group(struct btrfs_block_group_cache *cache)
+{
+ struct task_struct *tsk;
+ int ret = 0;
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ return ret;
+ }
+ cache->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&cache->lock);
+
+ tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
+ cache->key.objectid);
+ if (IS_ERR(tsk)) {
+ ret = PTR_ERR(tsk);
+ printk(KERN_ERR "error running thread %d\n", ret);
+ BUG();
+ }
+
return ret;
}
@@ -990,15 +1093,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
return type;
}
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key)
{
- int level;
- BUG_ON(!path->keep_locks);
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
break;
- btrfs_assert_tree_locked(path->nodes[level]);
if (path->slots[level] + 1 >=
btrfs_header_nritems(path->nodes[level]))
continue;
@@ -1158,7 +1259,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
* For simplicity, we just do not add new inline back
* ref if there is any kind of item for this block
*/
- if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+ if (find_next_key(path, 0, &key) == 0 &&
+ key.objectid == bytenr &&
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
err = -EAGAIN;
goto out;
@@ -2388,13 +2490,29 @@ fail:
}
+static struct btrfs_block_group_cache *
+next_block_group(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ struct rb_node *node;
+ spin_lock(&root->fs_info->block_group_cache_lock);
+ node = rb_next(&cache->cache_node);
+ btrfs_put_block_group(cache);
+ if (node) {
+ cache = rb_entry(node, struct btrfs_block_group_cache,
+ cache_node);
+ atomic_inc(&cache->count);
+ } else
+ cache = NULL;
+ spin_unlock(&root->fs_info->block_group_cache_lock);
+ return cache;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_group_cache *cache, *entry;
- struct rb_node *n;
+ struct btrfs_block_group_cache *cache;
int err = 0;
- int werr = 0;
struct btrfs_path *path;
u64 last = 0;
@@ -2403,39 +2521,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return -ENOMEM;
while (1) {
- cache = NULL;
- spin_lock(&root->fs_info->block_group_cache_lock);
- for (n = rb_first(&root->fs_info->block_group_cache_tree);
- n; n = rb_next(n)) {
- entry = rb_entry(n, struct btrfs_block_group_cache,
- cache_node);
- if (entry->dirty) {
- cache = entry;
- break;
- }
+ if (last == 0) {
+ err = btrfs_run_delayed_refs(trans, root,
+ (unsigned long)-1);
+ BUG_ON(err);
}
- spin_unlock(&root->fs_info->block_group_cache_lock);
- if (!cache)
- break;
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ if (cache->dirty)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
cache->dirty = 0;
- last += cache->key.offset;
+ last = cache->key.objectid + cache->key.offset;
- err = write_one_cache_group(trans, root,
- path, cache);
- /*
- * if we fail to write the cache group, we want
- * to keep it marked dirty in hopes that a later
- * write will work
- */
- if (err) {
- werr = err;
- continue;
- }
+ err = write_one_cache_group(trans, root, path, cache);
+ BUG_ON(err);
+ btrfs_put_block_group(cache);
}
+
btrfs_free_path(path);
- return werr;
+ return 0;
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2485,6 +2599,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->force_alloc = 0;
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
+ atomic_set(&found->caching_threads, 0);
return 0;
}
@@ -2697,7 +2812,7 @@ again:
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
- "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
"%llu total\n", (unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_delalloc,
(unsigned long long)data_sinfo->bytes_used,
@@ -2948,13 +3063,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
- if (pin) {
+ if (pin)
set_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS);
- } else {
- clear_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + num - 1, GFP_NOFS);
- }
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2970,14 +3081,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
spin_unlock(&cache->space_info->lock);
fs_info->total_pinned += len;
} else {
+ int unpin = 0;
+
+ /*
+ * in order to not race with the block group caching, we
+ * only want to unpin the extent if we are cached. If
+ * we aren't cached, we want to start async caching this
+ * block group so we can free the extent the next time
+ * around.
+ */
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
+ unpin = (cache->cached == BTRFS_CACHE_FINISHED);
+ if (likely(unpin)) {
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ fs_info->total_pinned -= len;
+ }
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- fs_info->total_pinned -= len;
- if (cache->cached)
+
+ if (likely(unpin))
+ clear_extent_dirty(&fs_info->pinned_extents,
+ bytenr, bytenr + len -1,
+ GFP_NOFS);
+ else
+ cache_block_group(cache);
+
+ if (unpin)
btrfs_add_free_space(cache, bytenr, len);
}
btrfs_put_block_group(cache);
@@ -3031,6 +3162,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
&start, &end, EXTENT_DIRTY);
if (ret)
break;
+
set_extent_dirty(copy, start, end, GFP_NOFS);
last = end + 1;
}
@@ -3059,6 +3191,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
+
return ret;
}
@@ -3437,6 +3570,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
}
/*
+ * when we wait for progress in the block group caching, its because
+ * our allocation attempt failed at least once. So, we must sleep
+ * and let some progress happen before we try again.
+ *
+ * This function will sleep at least once waiting for new free space to
+ * show up, and then it will check the block group free space numbers
+ * for our min num_bytes. Another option is to have it go ahead
+ * and look in the rbtree for a free extent of a given size, but this
+ * is a good start.
+ */
+static noinline int
+wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
+ u64 num_bytes)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (block_group_cache_done(cache)) {
+ finish_wait(&cache->caching_q, &wait);
+ return 0;
+ }
+ schedule();
+ finish_wait(&cache->caching_q, &wait);
+
+ wait_event(cache->caching_q, block_group_cache_done(cache) ||
+ (cache->free_space >= num_bytes));
+ return 0;
+}
+
+enum btrfs_loop_type {
+ LOOP_CACHED_ONLY = 0,
+ LOOP_CACHING_NOWAIT = 1,
+ LOOP_CACHING_WAIT = 2,
+ LOOP_ALLOC_CHUNK = 3,
+ LOOP_NO_EMPTY_SIZE = 4,
+};
+
+/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
* ins->objectid == block start
@@ -3461,6 +3633,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_space_info *space_info;
int last_ptr_loop = 0;
int loop = 0;
+ bool found_uncached_bg = false;
WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3492,15 +3665,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
search_start = max(search_start, first_logical_byte(root, 0));
search_start = max(search_start, hint_byte);
- if (!last_ptr) {
+ if (!last_ptr)
empty_cluster = 0;
- loop = 1;
- }
if (search_start == hint_byte) {
block_group = btrfs_lookup_block_group(root->fs_info,
search_start);
- if (block_group && block_group_bits(block_group, data)) {
+ /*
+ * we don't want to use the block group if it doesn't match our
+ * allocation bits, or if its not cached.
+ */
+ if (block_group && block_group_bits(block_group, data) &&
+ block_group_cache_done(block_group)) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
block_group->ro) {
@@ -3523,21 +3699,35 @@ search:
down_read(&space_info->groups_sem);
list_for_each_entry(block_group, &space_info->block_groups, list) {
u64 offset;
+ int cached;
atomic_inc(&block_group->count);
search_start = block_group->key.objectid;
have_block_group:
- if (unlikely(!block_group->cached)) {
- mutex_lock(&block_group->cache_mutex);
- ret = cache_block_group(root, block_group);
- mutex_unlock(&block_group->cache_mutex);
- if (ret) {
- btrfs_put_block_group(block_group);
- break;
+ if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+ /*
+ * we want to start caching kthreads, but not too many
+ * right off the bat so we don't overwhelm the system,
+ * so only start them if there are less than 2 and we're
+ * in the initial allocation phase.
+ */
+ if (loop > LOOP_CACHING_NOWAIT ||
+ atomic_read(&space_info->caching_threads) < 2) {
+ ret = cache_block_group(block_group);
+ BUG_ON(ret);
}
}
+ cached = block_group_cache_done(block_group);
+ if (unlikely(!cached)) {
+ found_uncached_bg = true;
+
+ /* if we only want cached bgs, loop */
+ if (loop == LOOP_CACHED_ONLY)
+ goto loop;
+ }
+
if (unlikely(block_group->ro))
goto loop;
@@ -3616,14 +3806,21 @@ refill_cluster:
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
+ } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+ spin_unlock(&last_ptr->refill_lock);
+
+ wait_block_group_cache_progress(block_group,
+ num_bytes + empty_cluster + empty_size);
+ goto have_block_group;
}
+
/*
* at this point we either didn't find a cluster
* or we weren't able to allocate a block from our
* cluster. Free the cluster we've been trying
* to use, and go to the next block group
*/
- if (loop < 2) {
+ if (loop < LOOP_NO_EMPTY_SIZE) {
btrfs_return_cluster_to_free_space(NULL,
last_ptr);
spin_unlock(&last_ptr->refill_lock);
@@ -3634,11 +3831,17 @@ refill_cluster:
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size);
- if (!offset)
+ if (!offset && (cached || (!cached &&
+ loop == LOOP_CACHING_NOWAIT))) {
goto loop;
+ } else if (!offset && (!cached &&
+ loop > LOOP_CACHING_NOWAIT)) {
+ wait_block_group_cache_progress(block_group,
+ num_bytes + empty_size);
+ goto have_block_group;
+ }
checks:
search_start = stripe_align(root, offset);
-
/* move on to the next group */
if (search_start + num_bytes >= search_end) {
btrfs_add_free_space(block_group, offset, num_bytes);
@@ -3684,13 +3887,26 @@ loop:
}
up_read(&space_info->groups_sem);
- /* loop == 0, try to find a clustered alloc in every block group
- * loop == 1, try again after forcing a chunk allocation
- * loop == 2, set empty_size and empty_cluster to 0 and try again
+ /* LOOP_CACHED_ONLY, only search fully cached block groups
+ * LOOP_CACHING_NOWAIT, search partially cached block groups, but
+ * dont wait foR them to finish caching
+ * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+ * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
+ * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
+ * again
*/
- if (!ins->objectid && loop < 3 &&
- (empty_size || empty_cluster || allowed_chunk_alloc)) {
- if (loop >= 2) {
+ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
+ (found_uncached_bg || empty_size || empty_cluster ||
+ allowed_chunk_alloc)) {
+ if (found_uncached_bg) {
+ found_uncached_bg = false;
+ if (loop < LOOP_CACHING_WAIT) {
+ loop++;
+ goto search;
+ }
+ }
+
+ if (loop == LOOP_ALLOC_CHUNK) {
empty_size = 0;
empty_cluster = 0;
}
@@ -3703,7 +3919,7 @@ loop:
space_info->force_alloc = 1;
}
- if (loop < 3) {
+ if (loop < LOOP_NO_EMPTY_SIZE) {
loop++;
goto search;
}
@@ -3799,7 +4015,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret) {
+ if (ret == -ENOSPC) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -3807,7 +4023,6 @@ again:
"wanted %llu\n", (unsigned long long)data,
(unsigned long long)num_bytes);
dump_space_info(sinfo, num_bytes);
- BUG();
}
return ret;
@@ -3845,7 +4060,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins,
data);
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
+ if (!ret)
+ update_reserved_extents(root, ins->objectid, ins->offset, 1);
+
return ret;
}
@@ -4007,9 +4224,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- mutex_lock(&block_group->cache_mutex);
- cache_block_group(root, block_group);
- mutex_unlock(&block_group->cache_mutex);
+ cache_block_group(block_group);
+ wait_event(block_group->caching_q,
+ block_group_cache_done(block_group));
ret = btrfs_remove_free_space(block_group, ins->objectid,
ins->offset);
@@ -4040,7 +4257,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
empty_size, hint_byte, search_end,
ins, 0);
- BUG_ON(ret);
+ if (ret)
+ return ret;
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
@@ -4128,6 +4346,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return buf;
}
+#if 0
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf)
{
@@ -4171,8 +4390,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
return 0;
}
-#if 0
-
static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_leaf_ref *ref)
@@ -4553,262 +4770,471 @@ out:
}
#endif
+struct walk_control {
+ u64 refs[BTRFS_MAX_LEVEL];
+ u64 flags[BTRFS_MAX_LEVEL];
+ struct btrfs_key update_progress;
+ int stage;
+ int level;
+ int shared_level;
+ int update_ref;
+ int keep_locks;
+};
+
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
+
/*
- * helper function for drop_subtree, this function is similar to
- * walk_down_tree. The main difference is that it checks reference
- * counts while tree blocks are locked.
+ * hepler to process tree block while walking down the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block. if the block is shared and
+ * we need update back refs for the subtree rooted at the
+ * block, this function changes wc->stage to UPDATE_BACKREF
+ *
+ * when wc->stage == UPDATE_BACKREF, this function updates
+ * back refs for pointers in the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
*/
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path, int *level)
+ struct btrfs_path *path,
+ struct walk_control *wc)
{
- struct extent_buffer *next;
- struct extent_buffer *cur;
- struct extent_buffer *parent;
- u64 bytenr;
- u64 ptr_gen;
- u64 refs;
- u64 flags;
- u32 blocksize;
+ int level = wc->level;
+ struct extent_buffer *eb = path->nodes[level];
+ struct btrfs_key key;
+ u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
int ret;
- cur = path->nodes[*level];
- ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
- &refs, &flags);
- BUG_ON(ret);
- if (refs > 1)
- goto out;
+ if (wc->stage == UPDATE_BACKREF &&
+ btrfs_header_owner(eb) != root->root_key.objectid)
+ return 1;
- BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+ /*
+ * when reference count of tree block is 1, it won't increase
+ * again. once full backref flag is set, we never clear it.
+ */
+ if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+ (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+ BUG_ON(!path->locks[level]);
+ ret = btrfs_lookup_extent_info(trans, root,
+ eb->start, eb->len,
+ &wc->refs[level],
+ &wc->flags[level]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level] == 0);
+ }
- while (*level >= 0) {
- cur = path->nodes[*level];
- if (*level == 0) {
- ret = btrfs_drop_leaf_ref(trans, root, cur);
- BUG_ON(ret);
- clean_tree_block(trans, root, cur);
- break;
- }
- if (path->slots[*level] >= btrfs_header_nritems(cur)) {
- clean_tree_block(trans, root, cur);
- break;
+ if (wc->stage == DROP_REFERENCE &&
+ wc->update_ref && wc->refs[level] > 1) {
+ BUG_ON(eb == root->node);
+ BUG_ON(path->slots[level] > 0);
+ if (level == 0)
+ btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
+ else
+ btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
+ if (btrfs_header_owner(eb) == root->root_key.objectid &&
+ btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
+ wc->stage = UPDATE_BACKREF;
+ wc->shared_level = level;
}
+ }
- bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
- blocksize = btrfs_level_size(root, *level - 1);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+ if (wc->stage == DROP_REFERENCE) {
+ if (wc->refs[level] > 1)
+ return 1;
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ if (path->locks[level] && !wc->keep_locks) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ }
+ return 0;
+ }
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
- &refs, &flags);
+ /* wc->stage == UPDATE_BACKREF */
+ if (!(wc->flags[level] & flag)) {
+ BUG_ON(!path->locks[level]);
+ ret = btrfs_inc_ref(trans, root, eb, 1);
BUG_ON(ret);
- if (refs > 1) {
- parent = path->nodes[*level];
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, parent->start,
- btrfs_header_owner(parent),
- *level - 1, 0);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
+ BUG_ON(ret);
+ ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
+ eb->len, flag, 0);
+ BUG_ON(ret);
+ wc->flags[level] |= flag;
+ }
+
+ /*
+ * the block is shared by multiple trees, so it's not good to
+ * keep the tree lock
+ */
+ if (path->locks[level] && level > 0) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ }
+ return 0;
+}
+
+/*
+ * hepler to process tree block while walking up the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function drops
+ * reference count on the block.
+ *
+ * when wc->stage == UPDATE_BACKREF, this function changes
+ * wc->stage back to DROP_REFERENCE if we changed wc->stage
+ * to UPDATE_BACKREF previously while processing the block.
+ *
+ * NOTE: return value 1 means we should stop walking up.
+ */
+static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ int ret = 0;
+ int level = wc->level;
+ struct extent_buffer *eb = path->nodes[level];
+ u64 parent = 0;
+
+ if (wc->stage == UPDATE_BACKREF) {
+ BUG_ON(wc->shared_level < level);
+ if (level < wc->shared_level)
+ goto out;
+
+ BUG_ON(wc->refs[level] <= 1);
+ ret = find_next_key(path, level + 1, &wc->update_progress);
+ if (ret > 0)
+ wc->update_ref = 0;
+
+ wc->stage = DROP_REFERENCE;
+ wc->shared_level = -1;
+ path->slots[level] = 0;
+
+ /*
+ * check reference count again if the block isn't locked.
+ * we should start walking down the tree again if reference
+ * count is one.
+ */
+ if (!path->locks[level]) {
+ BUG_ON(level == 0);
+ btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
+ path->locks[level] = 1;
+
+ ret = btrfs_lookup_extent_info(trans, root,
+ eb->start, eb->len,
+ &wc->refs[level],
+ &wc->flags[level]);
BUG_ON(ret);
- path->slots[*level]++;
- btrfs_tree_unlock(next);
- free_extent_buffer(next);
- continue;
+ BUG_ON(wc->refs[level] == 0);
+ if (wc->refs[level] == 1) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ return 1;
+ }
+ } else {
+ BUG_ON(level != 0);
}
+ }
- BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+ /* wc->stage == DROP_REFERENCE */
+ BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
- *level = btrfs_header_level(next);
- path->nodes[*level] = next;
- path->slots[*level] = 0;
- path->locks[*level] = 1;
- cond_resched();
+ if (wc->refs[level] == 1) {
+ if (level == 0) {
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ ret = btrfs_dec_ref(trans, root, eb, 1);
+ else
+ ret = btrfs_dec_ref(trans, root, eb, 0);
+ BUG_ON(ret);
+ }
+ /* make block locked assertion in clean_tree_block happy */
+ if (!path->locks[level] &&
+ btrfs_header_generation(eb) == trans->transid) {
+ btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
+ path->locks[level] = 1;
+ }
+ clean_tree_block(trans, root, eb);
}
-out:
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
- bytenr = path->nodes[*level]->start;
- blocksize = path->nodes[*level]->len;
- ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
- btrfs_header_owner(parent), *level, 0);
+ if (eb == root->node) {
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = eb->start;
+ else
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(eb));
+ } else {
+ if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = path->nodes[level + 1]->start;
+ else
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level + 1]));
+ }
+
+ ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
+ root->root_key.objectid, level, 0);
BUG_ON(ret);
+out:
+ wc->refs[level] = 0;
+ wc->flags[level] = 0;
+ return ret;
+}
+
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ struct extent_buffer *next;
+ struct extent_buffer *cur;
+ u64 bytenr;
+ u64 ptr_gen;
+ u32 blocksize;
+ int level = wc->level;
+ int ret;
+
+ while (level >= 0) {
+ cur = path->nodes[level];
+ BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+
+ ret = walk_down_proc(trans, root, path, wc);
+ if (ret > 0)
+ break;
+
+ if (level == 0)
+ break;
+
+ bytenr = btrfs_node_blockptr(cur, path->slots[level]);
+ blocksize = btrfs_level_size(root, level - 1);
+ ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
+
+ next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
- if (path->locks[*level]) {
- btrfs_tree_unlock(path->nodes[*level]);
- path->locks[*level] = 0;
+ level--;
+ BUG_ON(level != btrfs_header_level(next));
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ path->locks[level] = 1;
+ wc->level = level;
}
- free_extent_buffer(path->nodes[*level]);
- path->nodes[*level] = NULL;
- *level += 1;
- cond_resched();
return 0;
}
-/*
- * helper for dropping snapshots. This walks back up the tree in the path
- * to find the first node higher up where we haven't yet gone through
- * all the slots
- */
static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- int *level, int max_level)
+ struct walk_control *wc, int max_level)
{
- struct btrfs_root_item *root_item = &root->root_item;
- int i;
- int slot;
+ int level = wc->level;
int ret;
- for (i = *level; i < max_level && path->nodes[i]; i++) {
- slot = path->slots[i];
- if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- /*
- * there is more work to do in this level.
- * Update the drop_progress marker to reflect
- * the work we've done so far, and then bump
- * the slot number
- */
- path->slots[i]++;
- WARN_ON(*level == 0);
- if (max_level == BTRFS_MAX_LEVEL) {
- btrfs_node_key(path->nodes[i],
- &root_item->drop_progress,
- path->slots[i]);
- root_item->drop_level = i;
- }
- *level = i;
+ path->slots[level] = btrfs_header_nritems(path->nodes[level]);
+ while (level < max_level && path->nodes[level]) {
+ wc->level = level;
+ if (path->slots[level] + 1 <
+ btrfs_header_nritems(path->nodes[level])) {
+ path->slots[level]++;
return 0;
} else {
- struct extent_buffer *parent;
-
- /*
- * this whole node is done, free our reference
- * on it and go up one level
- */
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
+ ret = walk_up_proc(trans, root, path, wc);
+ if (ret > 0)
+ return 0;
- clean_tree_block(trans, root, path->nodes[i]);
- ret = btrfs_free_extent(trans, root,
- path->nodes[i]->start,
- path->nodes[i]->len,
- parent->start,
- btrfs_header_owner(parent),
- *level, 0);
- BUG_ON(ret);
- if (path->locks[*level]) {
- btrfs_tree_unlock(path->nodes[i]);
- path->locks[i] = 0;
+ if (path->locks[level]) {
+ btrfs_tree_unlock(path->nodes[level]);
+ path->locks[level] = 0;
}
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- *level = i + 1;
+ free_extent_buffer(path->nodes[level]);
+ path->nodes[level] = NULL;
+ level++;
}
}
return 1;
}
/*
- * drop the reference count on the tree rooted at 'snap'. This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
+ * drop a subvolume tree.
+ *
+ * this function traverses the tree freeing any blocks that only
+ * referenced by the tree.
+ *
+ * when a shared tree block is found. this function decreases its
+ * reference count by one. if update_ref is true, this function
+ * also make sure backrefs for the shared block and all lower level
+ * blocks are properly updated.
*/
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root)
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
{
- int ret = 0;
- int wret;
- int level;
struct btrfs_path *path;
- int update_count;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_root_item *root_item = &root->root_item;
+ struct walk_control *wc;
+ struct btrfs_key key;
+ int err = 0;
+ int ret;
+ int level;
path = btrfs_alloc_path();
BUG_ON(!path);
- level = btrfs_header_level(root->node);
+ wc = kzalloc(sizeof(*wc), GFP_NOFS);
+ BUG_ON(!wc);
+
+ trans = btrfs_start_transaction(tree_root, 1);
+
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+ level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = 1;
+ memset(&wc->update_progress, 0,
+ sizeof(wc->update_progress));
} else {
- struct btrfs_key key;
- struct btrfs_disk_key found_key;
- struct extent_buffer *node;
-
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+ memcpy(&wc->update_progress, &key,
+ sizeof(wc->update_progress));
+
level = root_item->drop_level;
+ BUG_ON(level == 0);
path->lowest_level = level;
- wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (wret < 0) {
- ret = wret;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ path->lowest_level = 0;
+ if (ret < 0) {
+ err = ret;
goto out;
}
- node = path->nodes[level];
- btrfs_node_key(node, &found_key, path->slots[level]);
- WARN_ON(memcmp(&found_key, &root_item->drop_progress,
- sizeof(found_key)));
+ btrfs_node_key_to_cpu(path->nodes[level], &key,
+ path->slots[level]);
+ WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+
/*
* unlock our path, this is safe because only this
* function is allowed to delete this snapshot
*/
btrfs_unlock_up_safe(path, 0);
+
+ level = btrfs_header_level(root->node);
+ while (1) {
+ btrfs_tree_lock(path->nodes[level]);
+ btrfs_set_lock_blocking(path->nodes[level]);
+
+ ret = btrfs_lookup_extent_info(trans, root,
+ path->nodes[level]->start,
+ path->nodes[level]->len,
+ &wc->refs[level],
+ &wc->flags[level]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level] == 0);
+
+ if (level == root_item->drop_level)
+ break;
+
+ btrfs_tree_unlock(path->nodes[level]);
+ WARN_ON(wc->refs[level] != 1);
+ level--;
+ }
}
+
+ wc->level = level;
+ wc->shared_level = -1;
+ wc->stage = DROP_REFERENCE;
+ wc->update_ref = update_ref;
+ wc->keep_locks = 0;
+
while (1) {
- unsigned long update;
- wret = walk_down_tree(trans, root, path, &level);
- if (wret > 0)
+ ret = walk_down_tree(trans, root, path, wc);
+ if (ret < 0) {
+ err = ret;
break;
- if (wret < 0)
- ret = wret;
+ }
- wret = walk_up_tree(trans, root, path, &level,
- BTRFS_MAX_LEVEL);
- if (wret > 0)
+ ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
+ if (ret < 0) {
+ err = ret;
break;
- if (wret < 0)
- ret = wret;
- if (trans->transaction->in_commit ||
- trans->transaction->delayed_refs.flushing) {
- ret = -EAGAIN;
+ }
+
+ if (ret > 0) {
+ BUG_ON(wc->stage != DROP_REFERENCE);
break;
}
- for (update_count = 0; update_count < 16; update_count++) {
+
+ if (wc->stage == DROP_REFERENCE) {
+ level = wc->level;
+ btrfs_node_key(path->nodes[level],
+ &root_item->drop_progress,
+ path->slots[level]);
+ root_item->drop_level = level;
+ }
+
+ BUG_ON(wc->level == 0);
+ if (trans->transaction->in_commit ||
+ trans->transaction->delayed_refs.flushing) {
+ ret = btrfs_update_root(trans, tree_root,
+ &root->root_key,
+ root_item);
+ BUG_ON(ret);
+
+ btrfs_end_transaction(trans, tree_root);
+ trans = btrfs_start_transaction(tree_root, 1);
+ } else {
+ unsigned long update;
update = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (update)
- btrfs_run_delayed_refs(trans, root, update);
- else
- break;
+ btrfs_run_delayed_refs(trans, tree_root,
+ update);
}
}
+ btrfs_release_path(root, path);
+ BUG_ON(err);
+
+ ret = btrfs_del_root(trans, tree_root, &root->root_key);
+ BUG_ON(ret);
+
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ kfree(root);
out:
+ btrfs_end_transaction(trans, tree_root);
+ kfree(wc);
btrfs_free_path(path);
- return ret;
+ return err;
}
+/*
+ * drop subtree rooted at tree block 'node'.
+ *
+ * NOTE: this function will unlock and release tree block 'node'
+ */
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
struct extent_buffer *parent)
{
struct btrfs_path *path;
+ struct walk_control *wc;
int level;
int parent_level;
int ret = 0;
int wret;
+ BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+
path = btrfs_alloc_path();
BUG_ON(!path);
+ wc = kzalloc(sizeof(*wc), GFP_NOFS);
+ BUG_ON(!wc);
+
btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
extent_buffer_get(parent);
@@ -4817,24 +5243,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(node);
level = btrfs_header_level(node);
- extent_buffer_get(node);
path->nodes[level] = node;
path->slots[level] = 0;
+ path->locks[level] = 1;
+
+ wc->refs[parent_level] = 1;
+ wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ wc->level = level;
+ wc->shared_level = -1;
+ wc->stage = DROP_REFERENCE;
+ wc->update_ref = 0;
+ wc->keep_locks = 1;
while (1) {
- wret = walk_down_tree(trans, root, path, &level);
- if (wret < 0)
+ wret = walk_down_tree(trans, root, path, wc);
+ if (wret < 0) {
ret = wret;
- if (wret != 0)
break;
+ }
- wret = walk_up_tree(trans, root, path, &level, parent_level);
+ wret = walk_up_tree(trans, root, path, wc, parent_level);
if (wret < 0)
ret = wret;
if (wret != 0)
break;
}
+ kfree(wc);
btrfs_free_path(path);
return ret;
}
@@ -6739,11 +7174,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
&info->block_group_cache_tree);
spin_unlock(&info->block_group_cache_lock);
- btrfs_remove_free_space_cache(block_group);
down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem);
+ if (block_group->cached == BTRFS_CACHE_STARTED)
+ wait_event(block_group->caching_q,
+ block_group_cache_done(block_group));
+
+ btrfs_remove_free_space_cache(block_group);
+
WARN_ON(atomic_read(&block_group->count) != 1);
kfree(block_group);
@@ -6809,9 +7249,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
- mutex_init(&cache->cache_mutex);
+ cache->fs_info = info;
+ init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
+
+ /*
+ * we only want to have 32k of ram per block group for keeping
+ * track of free space, and if we pass 1/2 of that we want to
+ * start converting things over to using bitmaps
+ */
+ cache->extents_thresh = ((1024 * 32) / 2) /
+ sizeof(struct btrfs_free_space);
+
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
@@ -6820,6 +7270,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(root, path);
cache->flags = btrfs_block_group_flags(&cache->item);
+ cache->sectorsize = root->sectorsize;
+
+ remove_sb_from_cache(root, cache);
+
+ /*
+ * check for two cases, either we are full, and therefore
+ * don't need to bother with the caching work since we won't
+ * find any space, or we are empty, and we can just add all
+ * the space in and be done with it. This saves us _alot_ of
+ * time, particularly in the full case.
+ */
+ if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+ cache->cached = BTRFS_CACHE_FINISHED;
+ } else if (btrfs_block_group_used(&cache->item) == 0) {
+ cache->cached = BTRFS_CACHE_FINISHED;
+ add_new_free_space(cache, root->fs_info,
+ found_key.objectid,
+ found_key.objectid +
+ found_key.offset);
+ }
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
@@ -6863,10 +7333,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.objectid = chunk_offset;
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->sectorsize = root->sectorsize;
+
+ /*
+ * we only want to have 32k of ram per block group for keeping track
+ * of free space, and if we pass 1/2 of that we want to start
+ * converting things over to using bitmaps
+ */
+ cache->extents_thresh = ((1024 * 32) / 2) /
+ sizeof(struct btrfs_free_space);
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
- mutex_init(&cache->cache_mutex);
+ init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
@@ -6875,6 +7354,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+ cache->cached = BTRFS_CACHE_FINISHED;
+ remove_sb_from_cache(root, cache);
+
+ add_new_free_space(cache, root->fs_info, chunk_offset,
+ chunk_offset + size);
+
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
@@ -6933,7 +7418,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
spin_unlock(&root->fs_info->block_group_cache_lock);
- btrfs_remove_free_space_cache(block_group);
+
down_write(&block_group->space_info->groups_sem);
/*
* we must use list_del_init so people can check to see if they
@@ -6942,11 +7427,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
list_del_init(&block_group->list);
up_write(&block_group->space_info->groups_sem);
+ if (block_group->cached == BTRFS_CACHE_STARTED)
+ wait_event(block_group->caching_q,
+ block_group_cache_done(block_group));
+
+ btrfs_remove_free_space_cache(block_group);
+
spin_lock(&block_group->space_info->lock);
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
spin_unlock(&block_group->space_info->lock);
- block_group->space_info->full = 0;
+
+ btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group);