aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/block-group.c47
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/discard.c115
-rw-r--r--fs/btrfs/discard.h3
-rw-r--r--fs/btrfs/free-space-cache.c33
-rw-r--r--fs/btrfs/free-space-cache.h1
-rw-r--r--fs/btrfs/scrub.c7
7 files changed, 206 insertions, 9 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6ba15c45e779..ba71a84fd1ee 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1246,6 +1246,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
+ const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
int ret = 0;
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
@@ -1275,6 +1276,22 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
+
+ /*
+ * Async discard moves the final block group discard to be prior
+ * to the unused_bgs code path. Therefore, if it's not fully
+ * trimmed, punt it back to the async discard lists.
+ */
+ if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
+ !btrfs_is_free_space_trimmed(block_group)) {
+ trace_btrfs_skip_unused_block_group(block_group);
+ up_write(&space_info->groups_sem);
+ /* Requeue if we failed because of async discard */
+ btrfs_discard_queue_work(&fs_info->discard_ctl,
+ block_group);
+ goto next;
+ }
+
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->pinned ||
block_group->used || block_group->ro ||
@@ -1378,6 +1395,16 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
+ /*
+ * The normal path here is an unused block group is passed here,
+ * then trimming is handled in the transaction commit path.
+ * Async discard interposes before this to do the trimming
+ * before coming down the unused block group path as trimming
+ * will no longer be done later in the transaction commit path.
+ */
+ if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ goto flip_async;
+
/* DISCARD can flip during remount */
trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
@@ -1422,6 +1449,13 @@ next:
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
+ return;
+
+flip_async:
+ btrfs_end_transaction(trans);
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ btrfs_put_block_group(block_group);
+ btrfs_discard_punt_unused_bgs_list(fs_info);
}
void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
@@ -1626,6 +1660,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
set_free_space_tree_thresholds(cache);
+ cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
+
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
@@ -1792,7 +1828,10 @@ static int read_one_block_group(struct btrfs_fs_info *info,
inc_block_group_ro(cache, 1);
} else if (cache->used == 0) {
ASSERT(list_empty(&cache->bg_list));
- btrfs_mark_bg_unused(cache);
+ if (btrfs_test_opt(info, DISCARD_ASYNC))
+ btrfs_discard_queue_work(&info->discard_ctl, cache);
+ else
+ btrfs_mark_bg_unused(cache);
}
return 0;
error:
@@ -2755,8 +2794,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* dirty list to avoid races between cleaner kthread and space
* cache writeout.
*/
- if (!alloc && old_val == 0)
- btrfs_mark_bg_unused(cache);
+ if (!alloc && old_val == 0) {
+ if (!btrfs_test_opt(info, DISCARD_ASYNC))
+ btrfs_mark_bg_unused(cache);
+ }
btrfs_put_block_group(cache);
total -= num_bytes;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f7b429277089..d15a4aa721aa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -443,9 +443,14 @@ struct btrfs_full_stripe_locks_tree {
/* Discard control. */
/*
* Async discard uses multiple lists to differentiate the discard filter
- * parameters.
+ * parameters. Index 0 is for completely free block groups where we need to
+ * ensure the entire block group is trimmed without being lossy. Indices
+ * afterwards represent monotonically decreasing discard filter sizes to
+ * prioritize what should be discarded next.
*/
-#define BTRFS_NR_DISCARD_LISTS 1
+#define BTRFS_NR_DISCARD_LISTS 2
+#define BTRFS_DISCARD_INDEX_UNUSED 0
+#define BTRFS_DISCARD_INDEX_START 1
struct btrfs_discard_ctl {
struct workqueue_struct *discard_workers;
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 5924e757471b..0f1c6d01aab0 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -13,6 +13,7 @@
/* This is an initial delay to give some chance for block reuse */
#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
+#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
@@ -30,9 +31,13 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
return;
}
- if (list_empty(&block_group->discard_list))
+ if (list_empty(&block_group->discard_list) ||
+ block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
+ if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
+ block_group->discard_index = BTRFS_DISCARD_INDEX_START;
block_group->discard_eligible_time = (ktime_get_ns() +
BTRFS_DISCARD_DELAY);
+ }
list_move_tail(&block_group->discard_list,
get_discard_list(discard_ctl, block_group));
@@ -40,6 +45,27 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
spin_unlock(&discard_ctl->lock);
}
+static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+{
+ spin_lock(&discard_ctl->lock);
+
+ if (!btrfs_run_discard_work(discard_ctl)) {
+ spin_unlock(&discard_ctl->lock);
+ return;
+ }
+
+ list_del_init(&block_group->discard_list);
+
+ block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
+ block_group->discard_eligible_time = (ktime_get_ns() +
+ BTRFS_DISCARD_UNUSED_DELAY);
+ list_add_tail(&block_group->discard_list,
+ &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
+
+ spin_unlock(&discard_ctl->lock);
+}
+
static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
@@ -154,7 +180,10 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
return;
- add_to_discard_list(discard_ctl, block_group);
+ if (block_group->used == 0)
+ add_to_discard_unused_list(discard_ctl, block_group);
+ else
+ add_to_discard_list(discard_ctl, block_group);
if (!delayed_work_pending(&discard_ctl->work))
btrfs_discard_schedule_work(discard_ctl, false);
@@ -199,6 +228,29 @@ out:
}
/**
+ * btrfs_finish_discard_pass - determine next step of a block_group
+ * @discard_ctl: discard control
+ * @block_group: block_group of interest
+ *
+ * This determines the next step for a block group after it's finished going
+ * through a pass on a discard list. If it is unused and fully trimmed, we can
+ * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto
+ * the appropriate filter list or let it fall off.
+ */
+static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+{
+ remove_from_discard_list(discard_ctl, block_group);
+
+ if (block_group->used == 0) {
+ if (btrfs_is_free_space_trimmed(block_group))
+ btrfs_mark_bg_unused(block_group);
+ else
+ add_to_discard_unused_list(discard_ctl, block_group);
+ }
+}
+
+/**
* btrfs_discard_workfn - discard work function
* @work: work
*
@@ -219,7 +271,7 @@ static void btrfs_discard_workfn(struct work_struct *work)
btrfs_trim_block_group(block_group, &trimmed, block_group->start,
btrfs_block_group_end(block_group), 0);
- remove_from_discard_list(discard_ctl, block_group);
+ btrfs_finish_discard_pass(discard_ctl, block_group);
btrfs_discard_schedule_work(discard_ctl, false);
}
@@ -239,6 +291,60 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}
+/**
+ * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
+ * @fs_info: fs_info of interest
+ *
+ * The unused_bgs list needs to be punted to the discard lists because the
+ * order of operations is changed. In the normal sychronous discard path, the
+ * block groups are trimmed via a single large trim in transaction commit. This
+ * is ultimately what we are trying to avoid with asynchronous discard. Thus,
+ * it must be done before going down the unused_bgs path.
+ */
+void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_block_group *block_group, *next;
+
+ spin_lock(&fs_info->unused_bgs_lock);
+ /* We enabled async discard, so punt all to the queue */
+ list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
+ bg_list) {
+ list_del_init(&block_group->bg_list);
+ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+}
+
+/**
+ * btrfs_discard_purge_list - purge discard lists
+ * @discard_ctl: discard control
+ *
+ * If we are disabling async discard, we may have intercepted block groups that
+ * are completely free and ready for the unused_bgs path. As discarding will
+ * now happen in transaction commit or not at all, we can safely mark the
+ * corresponding block groups as unused and they will be sent on their merry
+ * way to the unused_bgs list.
+ */
+static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
+{
+ struct btrfs_block_group *block_group, *next;
+ int i;
+
+ spin_lock(&discard_ctl->lock);
+ for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
+ list_for_each_entry_safe(block_group, next,
+ &discard_ctl->discard_list[i],
+ discard_list) {
+ list_del_init(&block_group->discard_list);
+ spin_unlock(&discard_ctl->lock);
+ if (block_group->used == 0)
+ btrfs_mark_bg_unused(block_group);
+ spin_lock(&discard_ctl->lock);
+ }
+ }
+ spin_unlock(&discard_ctl->lock);
+}
+
void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
{
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
@@ -246,6 +352,8 @@ void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
return;
}
+ btrfs_discard_punt_unused_bgs_list(fs_info);
+
set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
}
@@ -270,4 +378,5 @@ void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
{
btrfs_discard_stop(fs_info);
cancel_delayed_work_sync(&fs_info->discard_ctl.work);
+ btrfs_discard_purge_list(&fs_info->discard_ctl);
}
diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h
index f3775e84d35a..3c5a04f8714f 100644
--- a/fs/btrfs/discard.h
+++ b/fs/btrfs/discard.h
@@ -7,6 +7,7 @@ struct btrfs_fs_info;
struct btrfs_discard_ctl;
struct btrfs_block_group;
+/* Work operations */
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group);
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
@@ -15,6 +16,8 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override);
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
+/* Setup/cleanup operations */
+void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
void btrfs_discard_resume(struct btrfs_fs_info *fs_info);
void btrfs_discard_stop(struct btrfs_fs_info *fs_info);
void btrfs_discard_init(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index fdc5401f3877..3c2796bb6498 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2707,6 +2707,37 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
}
+/**
+ * btrfs_is_free_space_trimmed - see if everything is trimmed
+ * @block_group: block_group of interest
+ *
+ * Walk @block_group's free space rb_tree to determine if everything is trimmed.
+ */
+bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space *info;
+ struct rb_node *node;
+ bool ret = true;
+
+ spin_lock(&ctl->tree_lock);
+ node = rb_first(&ctl->free_space_offset);
+
+ while (node) {
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
+
+ if (!btrfs_free_space_trimmed(info)) {
+ ret = false;
+ break;
+ }
+
+ node = rb_next(node);
+ }
+
+ spin_unlock(&ctl->tree_lock);
+ return ret;
+}
+
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size)
@@ -2793,6 +2824,8 @@ int btrfs_return_cluster_to_free_space(
ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&ctl->tree_lock);
+ btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
+
/* finally drop our ref */
btrfs_put_block_group(block_group);
return ret;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 6a85a5d16343..f799eb491410 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -119,6 +119,7 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
+bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 21de630b0730..22cf69e6e5bc 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -8,6 +8,7 @@
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "ctree.h"
+#include "discard.h"
#include "volumes.h"
#include "disk-io.h"
#include "ordered-data.h"
@@ -3659,7 +3660,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
cache->used == 0) {
spin_unlock(&cache->lock);
- btrfs_mark_bg_unused(cache);
+ if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ btrfs_discard_queue_work(&fs_info->discard_ctl,
+ cache);
+ else
+ btrfs_mark_bg_unused(cache);
} else {
spin_unlock(&cache->lock);
}