aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c619
1 files changed, 540 insertions, 79 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3283da419200..0598fd3c6e3f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -21,9 +21,11 @@
#include "space-info.h"
#include "delalloc-space.h"
#include "block-group.h"
+#include "discard.h"
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
-#define MAX_CACHE_BYTES_PER_GIG SZ_32K
+#define MAX_CACHE_BYTES_PER_GIG SZ_64K
+#define FORCE_EXTENT_THRESHOLD SZ_1M
struct btrfs_trim_range {
u64 start;
@@ -31,6 +33,8 @@ struct btrfs_trim_range {
struct list_head list;
};
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *bitmap_info);
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -752,6 +756,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
+ /*
+ * Sync discard ensures that the free space cache is always
+ * trimmed. So when reading this in, the state should reflect
+ * that. We also do this for async as a stop gap for lack of
+ * persistence.
+ */
+ if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
+ btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
+
if (!e->bytes) {
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
@@ -805,12 +819,19 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = io_ctl_read_bitmap(&io_ctl, e);
if (ret)
goto free_cache;
+ e->bitmap_extents = count_bitmap_extents(ctl, e);
+ if (!btrfs_free_space_trimmed(e)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
+ e->bitmap_extents;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
+ }
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
+ btrfs_discard_update_discardable(ctl->private, ctl);
io_ctl_free(&io_ctl);
return ret;
free_cache:
@@ -1624,6 +1645,11 @@ __unlink_free_space(struct btrfs_free_space_ctl *ctl,
{
rb_erase(&info->offset_index, &ctl->free_space_offset);
ctl->free_extents--;
+
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
+ }
}
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -1644,6 +1670,11 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
if (ret)
return ret;
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
+ }
+
ctl->free_space += info->bytes;
ctl->free_extents++;
return ret;
@@ -1664,26 +1695,17 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
- * The goal is to keep the total amount of memory used per 1gb of space
- * at or below 32k, so we need to adjust how much memory we allow to be
- * used by extent based free space tracking
+ * We are trying to keep the total amount of memory used per 1GiB of
+ * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
+ * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
+ * bitmaps, we may end up using more memory than this.
*/
if (size < SZ_1G)
max_bytes = MAX_CACHE_BYTES_PER_GIG;
else
max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
- /*
- * we want to account for 1 more bitmap than what we have so we can make
- * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
- * we add more bitmaps.
- */
- bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
-
- if (bitmap_bytes >= max_bytes) {
- ctl->extents_thresh = 0;
- return;
- }
+ bitmap_bytes = ctl->total_bitmaps * ctl->unit;
/*
* we want the extent entry threshold to always be at most 1/2 the max
@@ -1700,17 +1722,31 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info,
u64 offset, u64 bytes)
{
- unsigned long start, count;
+ unsigned long start, count, end;
+ int extent_delta = -1;
start = offset_to_bit(info->offset, ctl->unit, offset);
count = bytes_to_bits(bytes, ctl->unit);
- ASSERT(start + count <= BITS_PER_BITMAP);
+ end = start + count;
+ ASSERT(end <= BITS_PER_BITMAP);
bitmap_clear(info->bitmap, start, count);
info->bytes -= bytes;
if (info->max_extent_size > ctl->unit)
info->max_extent_size = 0;
+
+ if (start && test_bit(start - 1, info->bitmap))
+ extent_delta++;
+
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
+ extent_delta++;
+
+ info->bitmap_extents += extent_delta;
+ if (!btrfs_free_space_trimmed(info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
+ }
}
static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1725,16 +1761,30 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
u64 bytes)
{
- unsigned long start, count;
+ unsigned long start, count, end;
+ int extent_delta = 1;
start = offset_to_bit(info->offset, ctl->unit, offset);
count = bytes_to_bits(bytes, ctl->unit);
- ASSERT(start + count <= BITS_PER_BITMAP);
+ end = start + count;
+ ASSERT(end <= BITS_PER_BITMAP);
bitmap_set(info->bitmap, start, count);
info->bytes += bytes;
ctl->free_space += bytes;
+
+ if (start && test_bit(start - 1, info->bitmap))
+ extent_delta--;
+
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
+ extent_delta--;
+
+ info->bitmap_extents += extent_delta;
+ if (!btrfs_free_space_trimmed(info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
+ }
}
/*
@@ -1870,11 +1920,35 @@ out:
return NULL;
}
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *bitmap_info)
+{
+ struct btrfs_block_group *block_group = ctl->private;
+ u64 bytes = bitmap_info->bytes;
+ unsigned int rs, re;
+ int count = 0;
+
+ if (!block_group || !bytes)
+ return count;
+
+ bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
+ BITS_PER_BITMAP) {
+ bytes -= (rs - re) * ctl->unit;
+ count++;
+
+ if (!bytes)
+ break;
+ }
+
+ return count;
+}
+
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset)
{
info->offset = offset_to_bitmap(ctl, offset);
info->bytes = 0;
+ info->bitmap_extents = 0;
INIT_LIST_HEAD(&info->list);
link_free_space(ctl, info);
ctl->total_bitmaps++;
@@ -1885,6 +1959,18 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info)
{
+ /*
+ * Normally when this is called, the bitmap is completely empty. However,
+ * if we are blowing up the free space cache for one reason or another
+ * via __btrfs_remove_free_space_cache(), then it may not be freed and
+ * we may leave stats on the table.
+ */
+ if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
+ bitmap_info->bitmap_extents;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
+
+ }
unlink_free_space(ctl, bitmap_info);
kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
@@ -1971,11 +2057,24 @@ again:
static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
- u64 bytes)
+ u64 bytes, enum btrfs_trim_state trim_state)
{
u64 bytes_to_set = 0;
u64 end;
+ /*
+ * This is a tradeoff to make bitmap trim state minimal. We mark the
+ * whole bitmap untrimmed if at any point we add untrimmed regions.
+ */
+ if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
+ if (btrfs_free_space_trimmed(info)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
+ info->bitmap_extents;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
+ }
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+ }
+
end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
bytes_to_set = min(end - offset, bytes);
@@ -2004,6 +2103,10 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
forced = true;
#endif
+ /* This is a way to reclaim large regions from the bitmaps. */
+ if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
+ return false;
+
/*
* If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap
@@ -2016,8 +2119,8 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* of cache left then go ahead an dadd them, no sense in adding
* the overhead of a bitmap if we don't have to.
*/
- if (info->bytes <= fs_info->sectorsize * 4) {
- if (ctl->free_extents * 2 <= ctl->extents_thresh)
+ if (info->bytes <= fs_info->sectorsize * 8) {
+ if (ctl->free_extents * 3 <= ctl->extents_thresh)
return false;
} else {
return false;
@@ -2050,10 +2153,12 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group *block_group = NULL;
int added = 0;
u64 bytes, offset, bytes_added;
+ enum btrfs_trim_state trim_state;
int ret;
bytes = info->bytes;
offset = info->offset;
+ trim_state = info->trim_state;
if (!ctl->op->use_bitmap(ctl, info))
return 0;
@@ -2088,8 +2193,8 @@ again:
}
if (entry->offset == offset_to_bitmap(ctl, offset)) {
- bytes_added = add_bytes_to_bitmap(ctl, entry,
- offset, bytes);
+ bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
+ bytes, trim_state);
bytes -= bytes_added;
offset += bytes_added;
}
@@ -2108,7 +2213,8 @@ no_cluster_bitmap:
goto new_bitmap;
}
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
+ trim_state);
bytes -= bytes_added;
offset += bytes_added;
added = 0;
@@ -2142,6 +2248,7 @@ new_bitmap:
/* allocate the bitmap */
info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
GFP_NOFS);
+ info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
spin_lock(&ctl->tree_lock);
if (!info->bitmap) {
ret = -ENOMEM;
@@ -2161,6 +2268,22 @@ out:
return ret;
}
+/*
+ * Free space merging rules:
+ * 1) Merge trimmed areas together
+ * 2) Let untrimmed areas coalesce with trimmed areas
+ * 3) Always pull neighboring regions from bitmaps
+ *
+ * The above rules are for when we merge free space based on btrfs_trim_state.
+ * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
+ * same reason: to promote larger extent regions which makes life easier for
+ * find_free_extent(). Rule 2 enables coalescing based on the common path
+ * being returning free space from btrfs_finish_extent_commit(). So when free
+ * space is trimmed, it will prevent aggregating trimmed new region and
+ * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
+ * and provide find_free_extent() with the largest extents possible hoping for
+ * the reuse path.
+ */
static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, bool update_stat)
{
@@ -2169,6 +2292,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
bool merged = false;
u64 offset = info->offset;
u64 bytes = info->bytes;
+ const bool is_trimmed = btrfs_free_space_trimmed(info);
/*
* first we want to see if there is free space adjacent to the range we
@@ -2182,7 +2306,9 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
else
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
- if (right_info && !right_info->bitmap) {
+ /* See try_merge_free_space() comment. */
+ if (right_info && !right_info->bitmap &&
+ (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
if (update_stat)
unlink_free_space(ctl, right_info);
else
@@ -2192,8 +2318,10 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
merged = true;
}
+ /* See try_merge_free_space() comment. */
if (left_info && !left_info->bitmap &&
- left_info->offset + left_info->bytes == offset) {
+ left_info->offset + left_info->bytes == offset &&
+ (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
if (update_stat)
unlink_free_space(ctl, left_info);
else
@@ -2229,6 +2357,10 @@ static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
bytes = (j - i) * ctl->unit;
info->bytes += bytes;
+ /* See try_merge_free_space() comment. */
+ if (!btrfs_free_space_trimmed(bitmap))
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
if (update_stat)
bitmap_clear_bits(ctl, bitmap, end, bytes);
else
@@ -2282,6 +2414,10 @@ static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
info->offset -= bytes;
info->bytes += bytes;
+ /* See try_merge_free_space() comment. */
+ if (!btrfs_free_space_trimmed(bitmap))
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
if (update_stat)
bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
else
@@ -2331,10 +2467,13 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_free_space_ctl *ctl,
- u64 offset, u64 bytes)
+ u64 offset, u64 bytes,
+ enum btrfs_trim_state trim_state)
{
+ struct btrfs_block_group *block_group = ctl->private;
struct btrfs_free_space *info;
int ret = 0;
+ u64 filter_bytes = bytes;
info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
if (!info)
@@ -2342,6 +2481,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
info->offset = offset;
info->bytes = bytes;
+ info->trim_state = trim_state;
RB_CLEAR_NODE(&info->offset_index);
spin_lock(&ctl->tree_lock);
@@ -2370,10 +2510,13 @@ link:
*/
steal_from_bitmap(ctl, info, true);
+ filter_bytes = max(filter_bytes, info->bytes);
+
ret = link_free_space(ctl, info);
if (ret)
kmem_cache_free(btrfs_free_space_cachep, info);
out:
+ btrfs_discard_update_discardable(block_group, ctl);
spin_unlock(&ctl->tree_lock);
if (ret) {
@@ -2381,15 +2524,44 @@ out:
ASSERT(ret != -EEXIST);
}
+ if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
+ btrfs_discard_check_filter(block_group, filter_bytes);
+ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
+ }
+
return ret;
}
int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size)
{
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
+
+ return __btrfs_add_free_space(block_group->fs_info,
+ block_group->free_space_ctl,
+ bytenr, size, trim_state);
+}
+
+/*
+ * This is a subtle distinction because when adding free space back in general,
+ * we want it to be added as untrimmed for async. But in the case where we add
+ * it on loading of a block group, we want to consider it trimmed.
+ */
+int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
+ u64 bytenr, u64 size)
+{
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
+ btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
+
return __btrfs_add_free_space(block_group->fs_info,
block_group->free_space_ctl,
- bytenr, size);
+ bytenr, size, trim_state);
}
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
@@ -2464,8 +2636,10 @@ again:
}
spin_unlock(&ctl->tree_lock);
- ret = btrfs_add_free_space(block_group, offset + bytes,
- old_end - (offset + bytes));
+ ret = __btrfs_add_free_space(block_group->fs_info, ctl,
+ offset + bytes,
+ old_end - (offset + bytes),
+ info->trim_state);
WARN_ON(ret);
goto out;
}
@@ -2477,6 +2651,7 @@ again:
goto again;
}
out_lock:
+ btrfs_discard_update_discardable(block_group, ctl);
spin_unlock(&ctl->tree_lock);
out:
return ret;
@@ -2562,8 +2737,22 @@ __btrfs_return_cluster_to_free_space(
bitmap = (entry->bitmap != NULL);
if (!bitmap) {
+ /* Merging treats extents as if they were new */
+ if (!btrfs_free_space_trimmed(entry)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -=
+ entry->bytes;
+ }
+
try_merge_free_space(ctl, entry, false);
steal_from_bitmap(ctl, entry, false);
+
+ /* As we insert directly, update these statistics */
+ if (!btrfs_free_space_trimmed(entry)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] +=
+ entry->bytes;
+ }
}
tree_insert_offset(&ctl->free_space_offset,
entry->offset, &entry->offset_index, bitmap);
@@ -2599,6 +2788,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
spin_lock(&ctl->tree_lock);
__btrfs_remove_free_space_cache_locked(ctl);
+ if (ctl->private)
+ btrfs_discard_update_discardable(ctl->private, ctl);
spin_unlock(&ctl->tree_lock);
}
@@ -2620,20 +2811,55 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
cond_resched_lock(&ctl->tree_lock);
}
__btrfs_remove_free_space_cache_locked(ctl);
+ btrfs_discard_update_discardable(block_group, ctl);
spin_unlock(&ctl->tree_lock);
}
+/**
+ * btrfs_is_free_space_trimmed - see if everything is trimmed
+ * @block_group: block_group of interest
+ *
+ * Walk @block_group's free space rb_tree to determine if everything is trimmed.
+ */
+bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space *info;
+ struct rb_node *node;
+ bool ret = true;
+
+ spin_lock(&ctl->tree_lock);
+ node = rb_first(&ctl->free_space_offset);
+
+ while (node) {
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
+
+ if (!btrfs_free_space_trimmed(info)) {
+ ret = false;
+ break;
+ }
+
+ node = rb_next(node);
+ }
+
+ spin_unlock(&ctl->tree_lock);
+ return ret;
+}
+
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_discard_ctl *discard_ctl =
+ &block_group->fs_info->discard_ctl;
struct btrfs_free_space *entry = NULL;
u64 bytes_search = bytes + empty_size;
u64 ret = 0;
u64 align_gap = 0;
u64 align_gap_len = 0;
+ enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
spin_lock(&ctl->tree_lock);
entry = find_free_space(ctl, &offset, &bytes_search,
@@ -2644,12 +2870,20 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
ret = offset;
if (entry->bitmap) {
bitmap_clear_bits(ctl, entry, offset, bytes);
+
+ if (!btrfs_free_space_trimmed(entry))
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
+
if (!entry->bytes)
free_bitmap(ctl, entry);
} else {
unlink_free_space(ctl, entry);
align_gap_len = offset - entry->offset;
align_gap = entry->offset;
+ align_gap_trim_state = entry->trim_state;
+
+ if (!btrfs_free_space_trimmed(entry))
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
entry->offset = offset + bytes;
WARN_ON(entry->bytes < bytes + align_gap_len);
@@ -2661,11 +2895,13 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
link_free_space(ctl, entry);
}
out:
+ btrfs_discard_update_discardable(block_group, ctl);
spin_unlock(&ctl->tree_lock);
if (align_gap_len)
__btrfs_add_free_space(block_group->fs_info, ctl,
- align_gap, align_gap_len);
+ align_gap, align_gap_len,
+ align_gap_trim_state);
return ret;
}
@@ -2707,6 +2943,8 @@ int btrfs_return_cluster_to_free_space(
ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&ctl->tree_lock);
+ btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
+
/* finally drop our ref */
btrfs_put_block_group(block_group);
return ret;
@@ -2750,6 +2988,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
u64 min_start, u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_discard_ctl *discard_ctl =
+ &block_group->fs_info->discard_ctl;
struct btrfs_free_space *entry = NULL;
struct rb_node *node;
u64 ret = 0;
@@ -2814,7 +3054,12 @@ out:
spin_lock(&ctl->tree_lock);
+ if (!btrfs_free_space_trimmed(entry))
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
+
ctl->free_space -= bytes;
+ if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
if (entry->bytes == 0) {
ctl->free_extents--;
if (entry->bitmap) {
@@ -2822,6 +3067,8 @@ out:
entry->bitmap);
ctl->total_bitmaps--;
ctl->op->recalc_thresholds(ctl);
+ } else if (!btrfs_free_space_trimmed(entry)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
}
kmem_cache_free(btrfs_free_space_cachep, entry);
}
@@ -3148,6 +3395,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
static int do_trimming(struct btrfs_block_group *block_group,
u64 *total_trimmed, u64 start, u64 bytes,
u64 reserved_start, u64 reserved_bytes,
+ enum btrfs_trim_state reserved_trim_state,
struct btrfs_trim_range *trim_entry)
{
struct btrfs_space_info *space_info = block_group->space_info;
@@ -3155,6 +3403,9 @@ static int do_trimming(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int ret;
int update = 0;
+ const u64 end = start + bytes;
+ const u64 reserved_end = reserved_start + reserved_bytes;
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
u64 trimmed = 0;
spin_lock(&space_info->lock);
@@ -3168,11 +3419,20 @@ static int do_trimming(struct btrfs_block_group *block_group,
spin_unlock(&space_info->lock);
ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
- if (!ret)
+ if (!ret) {
*total_trimmed += trimmed;
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
+ }
mutex_lock(&ctl->cache_writeout_mutex);
- btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
+ if (reserved_start < start)
+ __btrfs_add_free_space(fs_info, ctl, reserved_start,
+ start - reserved_start,
+ reserved_trim_state);
+ if (start + bytes < reserved_start + reserved_bytes)
+ __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
+ reserved_trim_state);
+ __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
list_del(&trim_entry->list);
mutex_unlock(&ctl->cache_writeout_mutex);
@@ -3190,16 +3450,24 @@ static int do_trimming(struct btrfs_block_group *block_group,
return ret;
}
+/*
+ * If @async is set, then we will trim 1 region and return.
+ */
static int trim_no_bitmap(struct btrfs_block_group *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
{
+ struct btrfs_discard_ctl *discard_ctl =
+ &block_group->fs_info->discard_ctl;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
struct rb_node *node;
int ret = 0;
u64 extent_start;
u64 extent_bytes;
+ enum btrfs_trim_state extent_trim_state;
u64 bytes;
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
while (start < end) {
struct btrfs_trim_range trim_entry;
@@ -3207,49 +3475,66 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group,
mutex_lock(&ctl->cache_writeout_mutex);
spin_lock(&ctl->tree_lock);
- if (ctl->free_space < minlen) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (ctl->free_space < minlen)
+ goto out_unlock;
entry = tree_search_offset(ctl, start, 0, 1);
- if (!entry) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (!entry)
+ goto out_unlock;
- /* skip bitmaps */
- while (entry->bitmap) {
+ /* Skip bitmaps and if async, already trimmed entries */
+ while (entry->bitmap ||
+ (async && btrfs_free_space_trimmed(entry))) {
node = rb_next(&entry->offset_index);
- if (!node) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- goto out;
- }
+ if (!node)
+ goto out_unlock;
entry = rb_entry(node, struct btrfs_free_space,
offset_index);
}
- if (entry->offset >= end) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (entry->offset >= end)
+ goto out_unlock;
extent_start = entry->offset;
extent_bytes = entry->bytes;
- start = max(start, extent_start);
- bytes = min(extent_start + extent_bytes, end) - start;
- if (bytes < minlen) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- goto next;
- }
+ extent_trim_state = entry->trim_state;
+ if (async) {
+ start = entry->offset;
+ bytes = entry->bytes;
+ if (bytes < minlen) {
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+ goto next;
+ }
+ unlink_free_space(ctl, entry);
+ /*
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
+ * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
+ * X when we come back around. So trim it now.
+ */
+ if (max_discard_size &&
+ bytes >= (max_discard_size +
+ BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
+ bytes = max_discard_size;
+ extent_bytes = max_discard_size;
+ entry->offset += max_discard_size;
+ entry->bytes -= max_discard_size;
+ link_free_space(ctl, entry);
+ } else {
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ }
+ } else {
+ start = max(start, extent_start);
+ bytes = min(extent_start + extent_bytes, end) - start;
+ if (bytes < minlen) {
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+ goto next;
+ }
- unlink_free_space(ctl, entry);
- kmem_cache_free(btrfs_free_space_cachep, entry);
+ unlink_free_space(ctl, entry);
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ }
spin_unlock(&ctl->tree_lock);
trim_entry.start = extent_start;
@@ -3258,11 +3543,17 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group,
mutex_unlock(&ctl->cache_writeout_mutex);
ret = do_trimming(block_group, total_trimmed, start, bytes,
- extent_start, extent_bytes, &trim_entry);
- if (ret)
+ extent_start, extent_bytes, extent_trim_state,
+ &trim_entry);
+ if (ret) {
+ block_group->discard_cursor = start + bytes;
break;
+ }
next:
start += bytes;
+ block_group->discard_cursor = start;
+ if (async && *total_trimmed)
+ break;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
@@ -3271,19 +3562,76 @@ next:
cond_resched();
}
-out:
+
+ return ret;
+
+out_unlock:
+ block_group->discard_cursor = btrfs_block_group_end(block_group);
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+
return ret;
}
+/*
+ * If we break out of trimming a bitmap prematurely, we should reset the
+ * trimming bit. In a rather contrieved case, it's possible to race here so
+ * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
+ *
+ * start = start of bitmap
+ * end = near end of bitmap
+ *
+ * Thread 1: Thread 2:
+ * trim_bitmaps(start)
+ * trim_bitmaps(end)
+ * end_trimming_bitmap()
+ * reset_trimming_bitmap()
+ */
+static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
+{
+ struct btrfs_free_space *entry;
+
+ spin_lock(&ctl->tree_lock);
+ entry = tree_search_offset(ctl, offset, 1, 0);
+ if (entry) {
+ if (btrfs_free_space_trimmed(entry)) {
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
+ entry->bitmap_extents;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
+ }
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+ }
+
+ spin_unlock(&ctl->tree_lock);
+}
+
+static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *entry)
+{
+ if (btrfs_free_space_trimming_bitmap(entry)) {
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
+ entry->bitmap_extents;
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
+ }
+}
+
+/*
+ * If @async is set, then we will trim 1 region and return.
+ */
static int trim_bitmaps(struct btrfs_block_group *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
+ u64 maxlen, bool async)
{
+ struct btrfs_discard_ctl *discard_ctl =
+ &block_group->fs_info->discard_ctl;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
int ret = 0;
int ret2;
u64 bytes;
u64 offset = offset_to_bitmap(ctl, start);
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
while (offset < end) {
bool next_bitmap = false;
@@ -3293,35 +3641,84 @@ static int trim_bitmaps(struct btrfs_block_group *block_group,
spin_lock(&ctl->tree_lock);
if (ctl->free_space < minlen) {
+ block_group->discard_cursor =
+ btrfs_block_group_end(block_group);
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
entry = tree_search_offset(ctl, offset, 1, 0);
- if (!entry) {
+ /*
+ * Bitmaps are marked trimmed lossily now to prevent constant
+ * discarding of the same bitmap (the reason why we are bound
+ * by the filters). So, retrim the block group bitmaps when we
+ * are preparing to punt to the unused_bgs list. This uses
+ * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
+ * which is the only discard index which sets minlen to 0.
+ */
+ if (!entry || (async && minlen && start == offset &&
+ btrfs_free_space_trimmed(entry))) {
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
next_bitmap = true;
goto next;
}
+ /*
+ * Async discard bitmap trimming begins at by setting the start
+ * to be key.objectid and the offset_to_bitmap() aligns to the
+ * start of the bitmap. This lets us know we are fully
+ * scanning the bitmap rather than only some portion of it.
+ */
+ if (start == offset)
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
+
bytes = minlen;
ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
if (ret2 || start >= end) {
+ /*
+ * We lossily consider a bitmap trimmed if we only skip
+ * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
+ */
+ if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
+ end_trimming_bitmap(ctl, entry);
+ else
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
next_bitmap = true;
goto next;
}
+ /*
+ * We already trimmed a region, but are using the locking above
+ * to reset the trim_state.
+ */
+ if (async && *total_trimmed) {
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+ goto out;
+ }
+
bytes = min(bytes, end - start);
- if (bytes < minlen) {
+ if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
goto next;
}
+ /*
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
+ * If X < @minlen, we won't trim X when we come back around.
+ * So trim it now. We differ here from trimming extents as we
+ * don't keep individual state per bit.
+ */
+ if (async &&
+ max_discard_size &&
+ bytes > (max_discard_size + minlen))
+ bytes = max_discard_size;
+
bitmap_clear_bits(ctl, entry, start, bytes);
if (entry->bytes == 0)
free_bitmap(ctl, entry);
@@ -3333,19 +3730,25 @@ static int trim_bitmaps(struct btrfs_block_group *block_group,
mutex_unlock(&ctl->cache_writeout_mutex);
ret = do_trimming(block_group, total_trimmed, start, bytes,
- start, bytes, &trim_entry);
- if (ret)
+ start, bytes, 0, &trim_entry);
+ if (ret) {
+ reset_trimming_bitmap(ctl, offset);
+ block_group->discard_cursor =
+ btrfs_block_group_end(block_group);
break;
+ }
next:
if (next_bitmap) {
offset += BITS_PER_BITMAP * ctl->unit;
+ start = offset;
} else {
start += bytes;
- if (start >= offset + BITS_PER_BITMAP * ctl->unit)
- offset += BITS_PER_BITMAP * ctl->unit;
}
+ block_group->discard_cursor = start;
if (fatal_signal_pending(current)) {
+ if (start != offset)
+ reset_trimming_bitmap(ctl, offset);
ret = -ERESTARTSYS;
break;
}
@@ -3353,6 +3756,10 @@ next:
cond_resched();
}
+ if (offset >= end)
+ block_group->discard_cursor = end;
+
+out:
return ret;
}
@@ -3399,7 +3806,9 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group)
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int ret;
+ u64 rem = 0;
*trimmed = 0;
@@ -3411,16 +3820,66 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
btrfs_get_block_group_trimming(block_group);
spin_unlock(&block_group->lock);
- ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
if (ret)
goto out;
- ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
+ div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
+ /* If we ended in the middle of a bitmap, reset the trimming flag */
+ if (rem)
+ reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
out:
btrfs_put_block_group_trimming(block_group);
return ret;
}
+int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
+{
+ int ret;
+
+ *trimmed = 0;
+
+ spin_lock(&block_group->lock);
+ if (block_group->removed) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ btrfs_get_block_group_trimming(block_group);
+ spin_unlock(&block_group->lock);
+
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
+ btrfs_put_block_group_trimming(block_group);
+
+ return ret;
+}
+
+int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ u64 maxlen, bool async)
+{
+ int ret;
+
+ *trimmed = 0;
+
+ spin_lock(&block_group->lock);
+ if (block_group->removed) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ btrfs_get_block_group_trimming(block_group);
+ spin_unlock(&block_group->lock);
+
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
+ async);
+
+ btrfs_put_block_group_trimming(block_group);
+
+ return ret;
+}
+
/*
* Find the left-most item in the cache tree, and then return the
* smallest inode number in the item.
@@ -3600,6 +4059,7 @@ int test_add_free_space_entry(struct btrfs_block_group *cache,
struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
struct btrfs_free_space *info = NULL, *bitmap_info;
void *map = NULL;
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
u64 bytes_added;
int ret;
@@ -3641,7 +4101,8 @@ again:
info = NULL;
}
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
+ trim_state);
bytes -= bytes_added;
offset += bytes_added;