aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c364
1 files changed, 253 insertions, 111 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d81035b7ea7d..1d49694e6ae3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2492,9 +2492,6 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
}
}
- /* Also free its reserved qgroup space */
- btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
- head->qgroup_reserved);
btrfs_delayed_refs_rsv_release(fs_info, nr_items);
}
@@ -3013,8 +3010,7 @@ again:
}
if (run_all) {
- if (!list_empty(&trans->new_bgs))
- btrfs_create_pending_block_groups(trans);
+ btrfs_create_pending_block_groups(trans);
spin_lock(&delayed_refs->lock);
node = rb_first_cached(&delayed_refs->href_root);
@@ -4280,10 +4276,14 @@ commit_trans:
/*
* The cleaner kthread might still be doing iput
* operations. Wait for it to finish so that
- * more space is released.
+ * more space is released. We don't need to
+ * explicitly run the delayed iputs here because
+ * the commit_transaction would have woken up
+ * the cleaner.
*/
- mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
- mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+ ret = btrfs_wait_on_delayed_iputs(fs_info);
+ if (ret)
+ return ret;
goto again;
} else {
btrfs_end_transaction(trans);
@@ -4396,7 +4396,6 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
@@ -4404,14 +4403,6 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
return 1;
/*
- * We need to take into account the global rsv because for all intents
- * and purposes it's used space. Don't worry about locking the
- * global_rsv, it doesn't change except when the transaction commits.
- */
- if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
- bytes_used += calc_global_rsv_need_space(global_rsv);
-
- /*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
@@ -4741,7 +4732,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
- u64 max_reclaim;
+ u64 async_pages;
u64 items;
long time_left;
unsigned long nr_pages;
@@ -4766,25 +4757,36 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
loops = 0;
while (delalloc_bytes && loops < 3) {
- max_reclaim = min(delalloc_bytes, to_reclaim);
- nr_pages = max_reclaim >> PAGE_SHIFT;
+ nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+
+ /*
+ * Triggers inode writeback for up to nr_pages. This will invoke
+ * ->writepages callback and trigger delalloc filling
+ * (btrfs_run_delalloc_range()).
+ */
btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
+
/*
- * We need to wait for the async pages to actually start before
- * we do anything.
+ * We need to wait for the compressed pages to start before
+ * we continue.
*/
- max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
- if (!max_reclaim)
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
goto skip_async;
- if (max_reclaim <= nr_pages)
- max_reclaim = 0;
+ /*
+ * Calculate how many compressed pages we want to be written
+ * before we continue. I.e if there are more async pages than we
+ * require wait_event will wait until nr_pages are written.
+ */
+ if (async_pages <= nr_pages)
+ async_pages = 0;
else
- max_reclaim -= nr_pages;
+ async_pages -= nr_pages;
wait_event(fs_info->async_submit_wait,
atomic_read(&fs_info->async_delalloc_pages) <=
- (int)max_reclaim);
+ (int)async_pages);
skip_async:
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets) &&
@@ -4808,6 +4810,7 @@ skip_async:
}
struct reserve_ticket {
+ u64 orig_bytes;
u64 bytes;
int error;
struct list_head list;
@@ -4851,10 +4854,19 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
if (!bytes_needed)
return 0;
- /* See if there is enough pinned space to make this reservation */
- if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * See if there is enough pinned space to make this reservation, or if
+ * we have block groups that are going to be freed, allowing us to
+ * possibly do a chunk allocation the next loop through.
+ */
+ if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+ __percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
goto commit;
/*
@@ -4862,7 +4874,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
* this reservation.
*/
if (space_info != delayed_rsv->space_info)
- return -ENOSPC;
+ goto enospc;
spin_lock(&delayed_rsv->lock);
reclaim_bytes += delayed_rsv->reserved;
@@ -4877,16 +4889,14 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
- return -ENOSPC;
- }
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+ goto enospc;
commit:
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return -ENOSPC;
-
return btrfs_commit_transaction(trans);
+enospc:
+ btrfs_end_transaction(trans);
+ return -ENOSPC;
}
/*
@@ -4939,6 +4949,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
btrfs_end_transaction(trans);
break;
case ALLOC_CHUNK:
+ case ALLOC_CHUNK_FORCE:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4946,7 +4957,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
}
ret = do_chunk_alloc(trans,
btrfs_metadata_alloc_profile(fs_info),
- CHUNK_ALLOC_NO_FORCE);
+ (state == ALLOC_CHUNK) ?
+ CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
if (ret > 0 || ret == -ENOSPC)
ret = 0;
@@ -4957,9 +4969,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
* bunch of pinned space, so make sure we run the iputs before
* we do our pinned bytes check below.
*/
- mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(fs_info);
- mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+ btrfs_wait_on_delayed_iputs(fs_info);
ret = may_commit_transaction(fs_info, space_info);
break;
@@ -5030,7 +5041,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
-static void wake_all_tickets(struct list_head *head)
+static bool wake_all_tickets(struct list_head *head)
{
struct reserve_ticket *ticket;
@@ -5039,7 +5050,10 @@ static void wake_all_tickets(struct list_head *head)
list_del_init(&ticket->list);
ticket->error = -ENOSPC;
wake_up(&ticket->wait);
+ if (ticket->bytes != ticket->orig_bytes)
+ return true;
}
+ return false;
}
/*
@@ -5091,11 +5105,28 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
commit_cycles--;
}
+ /*
+ * We don't want to force a chunk allocation until we've tried
+ * pretty hard to reclaim space. Think of the case where we
+ * freed up a bunch of space and so have a lot of pinned space
+ * to reclaim. We would rather use that than possibly create a
+ * underutilized metadata chunk. So if this is our first run
+ * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+ * commit the transaction. If nothing has changed the next go
+ * around then we can force a chunk allocation.
+ */
+ if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+ flush_state++;
+
if (flush_state > COMMIT_TRANS) {
commit_cycles++;
if (commit_cycles > 2) {
- wake_all_tickets(&space_info->tickets);
- space_info->flush = 0;
+ if (wake_all_tickets(&space_info->tickets)) {
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ commit_cycles--;
+ } else {
+ space_info->flush = 0;
+ }
} else {
flush_state = FLUSH_DELAYED_ITEMS_NR;
}
@@ -5109,12 +5140,18 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
}
+static const enum btrfs_flush_state priority_flush_states[] = {
+ FLUSH_DELAYED_ITEMS_NR,
+ FLUSH_DELAYED_ITEMS,
+ ALLOC_CHUNK,
+};
+
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
u64 to_reclaim;
- int flush_state = FLUSH_DELAYED_ITEMS_NR;
+ int flush_state;
spin_lock(&space_info->lock);
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
@@ -5125,8 +5162,10 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
}
spin_unlock(&space_info->lock);
+ flush_state = 0;
do {
- flush_space(fs_info, space_info, to_reclaim, flush_state);
+ flush_space(fs_info, space_info, to_reclaim,
+ priority_flush_states[flush_state]);
flush_state++;
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
@@ -5134,23 +5173,16 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
return;
}
spin_unlock(&space_info->lock);
-
- /*
- * Priority flushers can't wait on delalloc without
- * deadlocking.
- */
- if (flush_state == FLUSH_DELALLOC ||
- flush_state == FLUSH_DELALLOC_WAIT)
- flush_state = ALLOC_CHUNK;
- } while (flush_state < COMMIT_TRANS);
+ } while (flush_state < ARRAY_SIZE(priority_flush_states));
}
static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket, u64 orig_bytes)
+ struct reserve_ticket *ticket)
{
DEFINE_WAIT(wait);
+ u64 reclaim_bytes = 0;
int ret = 0;
spin_lock(&space_info->lock);
@@ -5171,14 +5203,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
ret = ticket->error;
if (!list_empty(&ticket->list))
list_del_init(&ticket->list);
- if (ticket->bytes && ticket->bytes < orig_bytes) {
- u64 num_bytes = orig_bytes - ticket->bytes;
- update_bytes_may_use(space_info, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, num_bytes, 0);
- }
+ if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
+ reclaim_bytes = ticket->orig_bytes - ticket->bytes;
spin_unlock(&space_info->lock);
+ if (reclaim_bytes)
+ space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
return ret;
}
@@ -5204,6 +5234,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
{
struct reserve_ticket ticket;
u64 used;
+ u64 reclaim_bytes = 0;
int ret = 0;
ASSERT(orig_bytes);
@@ -5239,6 +5270,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
+ ticket.orig_bytes = orig_bytes;
ticket.bytes = orig_bytes;
ticket.error = 0;
init_waitqueue_head(&ticket.wait);
@@ -5279,25 +5311,21 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
return ret;
if (flush == BTRFS_RESERVE_FLUSH_ALL)
- return wait_reserve_ticket(fs_info, space_info, &ticket,
- orig_bytes);
+ return wait_reserve_ticket(fs_info, space_info, &ticket);
ret = 0;
priority_reclaim_metadata_space(fs_info, space_info, &ticket);
spin_lock(&space_info->lock);
if (ticket.bytes) {
- if (ticket.bytes < orig_bytes) {
- u64 num_bytes = orig_bytes - ticket.bytes;
- update_bytes_may_use(space_info, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags,
- num_bytes, 0);
-
- }
+ if (ticket.bytes < orig_bytes)
+ reclaim_bytes = orig_bytes - ticket.bytes;
list_del_init(&ticket.list);
ret = -ENOSPC;
}
spin_unlock(&space_info->lock);
+
+ if (reclaim_bytes)
+ space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
ASSERT(list_empty(&ticket.list));
return ret;
}
@@ -5775,6 +5803,21 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
+static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
+ u64 *metadata_bytes, u64 *qgroup_bytes)
+{
+ *metadata_bytes = 0;
+ *qgroup_bytes = 0;
+
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved < block_rsv->size)
+ *metadata_bytes = block_rsv->size - block_rsv->reserved;
+ if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
+ *qgroup_bytes = block_rsv->qgroup_rsv_size -
+ block_rsv->qgroup_rsv_reserved;
+ spin_unlock(&block_rsv->lock);
+}
+
/**
* btrfs_inode_rsv_refill - refill the inode block rsv.
* @inode - the inode we are refilling.
@@ -5790,25 +5833,42 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
{
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 num_bytes = 0;
- u64 qgroup_num_bytes = 0;
+ u64 num_bytes, last = 0;
+ u64 qgroup_num_bytes;
int ret = -ENOSPC;
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size)
- num_bytes = block_rsv->size - block_rsv->reserved;
- if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
- qgroup_num_bytes = block_rsv->qgroup_rsv_size -
- block_rsv->qgroup_rsv_reserved;
- spin_unlock(&block_rsv->lock);
-
+ calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
if (num_bytes == 0)
return 0;
- ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
- if (ret)
- return ret;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+ do {
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
+ true);
+ if (ret)
+ return ret;
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+ if (ret) {
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
+ last = num_bytes;
+ /*
+ * If we are fragmented we can end up with a lot of
+ * outstanding extents which will make our size be much
+ * larger than our reserved amount.
+ *
+ * If the reservation happens here, it might be very
+ * big though not needed in the end, if the delalloc
+ * flushing happens.
+ *
+ * If this is the case try and do the reserve again.
+ */
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
+ calc_refill_bytes(block_rsv, &num_bytes,
+ &qgroup_num_bytes);
+ if (num_bytes == 0)
+ return 0;
+ }
+ } while (ret && last != num_bytes);
+
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, false);
trace_btrfs_space_reservation(root->fs_info, "delalloc",
@@ -5818,8 +5878,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
spin_lock(&block_rsv->lock);
block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
spin_unlock(&block_rsv->lock);
- } else
- btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
+ }
return ret;
}
@@ -8066,6 +8125,15 @@ loop:
return ret;
}
+#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
+do { \
+ struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
+ spin_lock(&__rsv->lock); \
+ btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
+ __rsv->size, __rsv->reserved); \
+ spin_unlock(&__rsv->lock); \
+} while (0)
+
static void dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info, u64 bytes,
int dump_block_groups)
@@ -8085,6 +8153,12 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
info->bytes_readonly);
spin_unlock(&info->lock);
+ DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+
if (!dump_block_groups)
return;
@@ -8492,7 +8566,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
clean_tree_block(fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
- btrfs_set_lock_blocking(buf);
+ btrfs_set_lock_blocking_write(buf);
set_extent_buffer_uptodate(buf);
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
@@ -8690,6 +8764,8 @@ struct walk_control {
u64 refs[BTRFS_MAX_LEVEL];
u64 flags[BTRFS_MAX_LEVEL];
struct btrfs_key update_progress;
+ struct btrfs_key drop_progress;
+ int drop_level;
int stage;
int level;
int shared_level;
@@ -8697,6 +8773,7 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
+ int restarted;
};
#define DROP_REFERENCE 1
@@ -8860,6 +8937,33 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
}
/*
+ * This is used to verify a ref exists for this root to deal with a bug where we
+ * would have a drop_progress key that hadn't been updated properly.
+ */
+static int check_ref_exists(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr, u64 parent,
+ int level)
+{
+ struct btrfs_path *path;
+ struct btrfs_extent_inline_ref *iref;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = lookup_extent_backref(trans, path, &iref, bytenr,
+ root->fs_info->nodesize, parent,
+ root->root_key.objectid, level, 0);
+ btrfs_free_path(path);
+ if (ret == -ENOENT)
+ return 0;
+ if (ret < 0)
+ return ret;
+ return 1;
+}
+
+/*
* helper to process tree block pointer.
*
* when wc->stage == DROP_REFERENCE, this function checks
@@ -8917,7 +9021,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
reada = 1;
}
btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ btrfs_set_lock_blocking_write(next);
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
&wc->refs[level - 1],
@@ -8977,7 +9081,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
return -EIO;
}
btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ btrfs_set_lock_blocking_write(next);
}
level--;
@@ -9014,6 +9118,23 @@ skip:
}
/*
+ * If we had a drop_progress we need to verify the refs are set
+ * as expected. If we find our ref then we know that from here
+ * on out everything should be correct, and we can clear the
+ * ->restarted flag.
+ */
+ if (wc->restarted) {
+ ret = check_ref_exists(trans, root, bytenr, parent,
+ level - 1);
+ if (ret < 0)
+ goto out_unlock;
+ if (ret == 0)
+ goto no_delete;
+ ret = 0;
+ wc->restarted = 0;
+ }
+
+ /*
* Reloc tree doesn't contribute to qgroup numbers, and we have
* already accounted them at merge time (replace_path),
* thus we could skip expensive subtree trace here.
@@ -9028,13 +9149,23 @@ skip:
ret);
}
}
+
+ /*
+ * We need to update the next key in our walk control so we can
+ * update the drop_progress key accordingly. We don't care if
+ * find_next_key doesn't find a key because that means we're at
+ * the end and are going to clean up now.
+ */
+ wc->drop_level = level;
+ find_next_key(path, level, &wc->drop_progress);
+
ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
parent, root->root_key.objectid,
level - 1, 0);
if (ret)
goto out_unlock;
}
-
+no_delete:
*lookup_info = 1;
ret = 1;
@@ -9089,7 +9220,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (!path->locks[level]) {
BUG_ON(level == 0);
btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
+ btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, fs_info,
@@ -9131,7 +9262,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (!path->locks[level] &&
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
+ btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(fs_info, eb);
@@ -9298,7 +9429,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
- btrfs_set_lock_blocking(path->nodes[level]);
+ btrfs_set_lock_blocking_write(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
@@ -9328,7 +9459,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
level = btrfs_header_level(root->node);
while (1) {
btrfs_tree_lock(path->nodes[level]);
- btrfs_set_lock_blocking(path->nodes[level]);
+ btrfs_set_lock_blocking_write(path->nodes[level]);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, fs_info,
@@ -9351,6 +9482,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
}
+ wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
wc->level = level;
wc->shared_level = -1;
wc->stage = DROP_REFERENCE;
@@ -9378,12 +9510,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
if (wc->stage == DROP_REFERENCE) {
- level = wc->level;
- btrfs_node_key(path->nodes[level],
- &root_item->drop_progress,
- path->slots[level]);
- root_item->drop_level = level;
+ wc->drop_level = wc->level;
+ btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
+ &wc->drop_progress,
+ path->slots[wc->drop_level]);
}
+ btrfs_cpu_key_to_disk(&root_item->drop_progress,
+ &wc->drop_progress);
+ root_item->drop_level = wc->drop_level;
BUG_ON(wc->level == 0);
if (btrfs_should_end_transaction(trans) ||
@@ -9595,6 +9729,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
+ u64 sinfo_used;
u64 min_allocable_bytes;
int ret = -ENOSPC;
@@ -9621,9 +9756,10 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
+ sinfo_used = btrfs_space_info_used(sinfo, true);
- if (btrfs_space_info_used(sinfo, true) + num_bytes +
- min_allocable_bytes <= sinfo->total_bytes) {
+ if (sinfo_used + num_bytes + min_allocable_bytes <=
+ sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
cache->ro++;
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
@@ -9632,6 +9768,15 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
out:
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
+ if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
+ btrfs_info(cache->fs_info,
+ "unable to make block group %llu ro",
+ cache->key.objectid);
+ btrfs_info(cache->fs_info,
+ "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
+ sinfo_used, num_bytes, min_allocable_bytes);
+ dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ }
return ret;
}
@@ -10781,13 +10926,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
spin_lock(&trans->transaction->dirty_bgs_lock);
- if (!list_empty(&block_group->dirty_list)) {
- WARN_ON(1);
- }
- if (!list_empty(&block_group->io_list)) {
- WARN_ON(1);
- }
+ WARN_ON(!list_empty(&block_group->dirty_list));
+ WARN_ON(!list_empty(&block_group->io_list));
spin_unlock(&trans->transaction->dirty_bgs_lock);
+
btrfs_remove_free_space_cache(block_group);
spin_lock(&block_group->space_info->lock);