aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c651
1 files changed, 243 insertions, 408 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c5880329ae37..f79e477a378e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -643,7 +643,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
mutex_lock(&caching_ctl->mutex);
- ret = load_free_space_cache(fs_info, cache);
+ ret = load_free_space_cache(cache);
spin_lock(&cache->lock);
if (ret == 1) {
@@ -756,14 +756,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
return NULL;
}
-static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
- bool metadata, u64 root_objectid)
+static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *ref)
{
struct btrfs_space_info *space_info;
+ s64 num_bytes = -ref->len;
u64 flags;
- if (metadata) {
- if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ if (ref->type == BTRFS_REF_METADATA) {
+ if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
@@ -1704,7 +1705,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(fs_info, path, size);
+ btrfs_extend_item(path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1779,7 +1780,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
int *last_ref)
{
struct extent_buffer *leaf = path->nodes[0];
- struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1834,7 +1834,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(fs_info, path, item_size, 1);
+ btrfs_truncate_item(path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
}
@@ -1905,7 +1905,6 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
-#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 *discarded_bytes)
{
@@ -2043,39 +2042,28 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
/* Can return -ENOMEM */
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset)
+ struct btrfs_ref *generic_ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int old_ref_mod, new_ref_mod;
int ret;
- BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
- root_objectid == BTRFS_TREE_LOG_OBJECTID);
+ ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
+ generic_ref->action);
+ BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
+ generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
- btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
- owner, offset, BTRFS_ADD_DELAYED_REF);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL,
- &old_ref_mod, &new_ref_mod);
- } else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, owner, offset,
- 0, BTRFS_ADD_DELAYED_REF,
+ if (generic_ref->type == BTRFS_REF_METADATA)
+ ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
+ NULL, &old_ref_mod, &new_ref_mod);
+ else
+ ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
&old_ref_mod, &new_ref_mod);
- }
- if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
- bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+ btrfs_ref_tree_mod(fs_info, generic_ref);
- add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
- }
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
+ add_pinned_bytes(fs_info, generic_ref);
return ret;
}
@@ -2877,97 +2865,6 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
return btrfs_check_space_for_delayed_refs(trans->fs_info);
}
-struct async_delayed_refs {
- struct btrfs_root *root;
- u64 transid;
- int count;
- int error;
- int sync;
- struct completion wait;
- struct btrfs_work work;
-};
-
-static inline struct async_delayed_refs *
-to_async_delayed_refs(struct btrfs_work *work)
-{
- return container_of(work, struct async_delayed_refs, work);
-}
-
-static void delayed_ref_async_start(struct btrfs_work *work)
-{
- struct async_delayed_refs *async = to_async_delayed_refs(work);
- struct btrfs_trans_handle *trans;
- struct btrfs_fs_info *fs_info = async->root->fs_info;
- int ret;
-
- /* if the commit is already started, we don't need to wait here */
- if (btrfs_transaction_blocked(fs_info))
- goto done;
-
- trans = btrfs_join_transaction(async->root);
- if (IS_ERR(trans)) {
- async->error = PTR_ERR(trans);
- goto done;
- }
-
- /*
- * trans->sync means that when we call end_transaction, we won't
- * wait on delayed refs
- */
- trans->sync = true;
-
- /* Don't bother flushing if we got into a different transaction */
- if (trans->transid > async->transid)
- goto end;
-
- ret = btrfs_run_delayed_refs(trans, async->count);
- if (ret)
- async->error = ret;
-end:
- ret = btrfs_end_transaction(trans);
- if (ret && !async->error)
- async->error = ret;
-done:
- if (async->sync)
- complete(&async->wait);
- else
- kfree(async);
-}
-
-int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
- unsigned long count, u64 transid, int wait)
-{
- struct async_delayed_refs *async;
- int ret;
-
- async = kmalloc(sizeof(*async), GFP_NOFS);
- if (!async)
- return -ENOMEM;
-
- async->root = fs_info->tree_root;
- async->count = count;
- async->error = 0;
- async->transid = transid;
- if (wait)
- async->sync = 1;
- else
- async->sync = 0;
- init_completion(&async->wait);
-
- btrfs_init_work(&async->work, btrfs_extent_refs_helper,
- delayed_ref_async_start, NULL, NULL);
-
- btrfs_queue_work(fs_info->extent_workers, &async->work);
-
- if (wait) {
- wait_for_completion(&async->wait);
- ret = async->error;
- kfree(async);
- return ret;
- }
- return 0;
-}
-
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -3036,7 +2933,6 @@ out:
}
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data)
{
@@ -3053,8 +2949,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->is_data = is_data ? true : false;
extent_op->level = level;
- ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
- num_bytes, extent_op);
+ ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
if (ret)
btrfs_free_delayed_extent_op(extent_op);
return ret;
@@ -3246,13 +3141,12 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
u32 nritems;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
+ struct btrfs_ref generic_ref = { 0 };
+ bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
int i;
+ int action;
int level;
int ret = 0;
- int (*process_func)(struct btrfs_trans_handle *,
- struct btrfs_root *,
- u64, u64, u64, u64, u64, u64);
-
if (btrfs_is_testing(fs_info))
return 0;
@@ -3264,15 +3158,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
return 0;
- if (inc)
- process_func = btrfs_inc_extent_ref;
- else
- process_func = btrfs_free_extent;
-
if (full_backref)
parent = buf->start;
else
parent = 0;
+ if (inc)
+ action = BTRFS_ADD_DELAYED_REF;
+ else
+ action = BTRFS_DROP_DELAYED_REF;
for (i = 0; i < nritems; i++) {
if (level == 0) {
@@ -3290,16 +3183,30 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
key.offset -= btrfs_file_extent_offset(buf, fi);
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, key.objectid,
- key.offset);
+ btrfs_init_generic_ref(&generic_ref, action, bytenr,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
+ key.offset);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+ else
+ ret = btrfs_free_extent(trans, &generic_ref);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = fs_info->nodesize;
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, level - 1, 0);
+ btrfs_init_generic_ref(&generic_ref, action, bytenr,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+ else
+ ret = btrfs_free_extent(trans, &generic_ref);
if (ret)
goto fail;
}
@@ -3322,10 +3229,10 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *extent_root = fs_info->extent_root;
unsigned long bi;
@@ -3348,10 +3255,10 @@ fail:
}
-static struct btrfs_block_group_cache *
-next_block_group(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static struct btrfs_block_group_cache *next_block_group(
+ struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
struct rb_node *node;
spin_lock(&fs_info->block_group_cache_lock);
@@ -3404,7 +3311,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
if (trans->aborted)
return 0;
again:
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
ret = PTR_ERR(inode);
btrfs_release_path(path);
@@ -3418,8 +3325,7 @@ again:
if (block_group->ro)
goto out_free;
- ret = create_free_space_inode(fs_info, trans, block_group,
- path);
+ ret = create_free_space_inode(trans, block_group, path);
if (ret)
goto out_free;
goto again;
@@ -3538,9 +3444,9 @@ out:
return ret;
}
-int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache, *tmp;
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_path *path;
@@ -3652,8 +3558,7 @@ again:
if (cache->disk_cache_state == BTRFS_DC_SETUP) {
cache->io_ctl.inode = NULL;
- ret = btrfs_write_out_cache(fs_info, trans,
- cache, path);
+ ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
num_started++;
should_put = 0;
@@ -3673,8 +3578,7 @@ again:
}
}
if (!ret) {
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
/*
* Our block group might still be attached to the list
* of new block groups in the transaction handle of some
@@ -3744,9 +3648,9 @@ again:
return ret;
}
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
@@ -3809,8 +3713,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
cache->io_ctl.inode = NULL;
- ret = btrfs_write_out_cache(fs_info, trans,
- cache, path);
+ ret = btrfs_write_out_cache(trans, cache, path);
if (ret == 0 && cache->io_ctl.inode) {
num_started++;
should_put = 0;
@@ -3824,8 +3727,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
}
}
if (!ret) {
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
/*
* One of the free space endio workers might have
* created a new block group while updating a free space
@@ -3842,8 +3744,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (ret == -ENOENT) {
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
- ret = write_one_cache_group(trans, fs_info,
- path, cache);
+ ret = write_one_cache_group(trans, path, cache);
}
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -4732,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
+ u64 dio_bytes;
u64 async_pages;
u64 items;
long time_left;
@@ -4747,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
delalloc_bytes = percpu_counter_sum_positive(
&fs_info->delalloc_bytes);
- if (delalloc_bytes == 0) {
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+ if (delalloc_bytes == 0 && dio_bytes == 0) {
if (trans)
return;
if (wait_ordered)
@@ -4755,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
return;
}
+ /*
+ * If we are doing more ordered than delalloc we need to just wait on
+ * ordered extents, otherwise we'll waste time trying to flush delalloc
+ * that likely won't give us the space back we need.
+ */
+ if (dio_bytes > delalloc_bytes)
+ wait_ordered = true;
+
loops = 0;
- while (delalloc_bytes && loops < 3) {
+ while ((delalloc_bytes || dio_bytes) && loops < 3) {
nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
/*
@@ -4806,6 +4717,7 @@ skip_async:
}
delalloc_bytes = percpu_counter_sum_positive(
&fs_info->delalloc_bytes);
+ dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
}
}
@@ -5803,85 +5715,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
-static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
- u64 *metadata_bytes, u64 *qgroup_bytes)
-{
- *metadata_bytes = 0;
- *qgroup_bytes = 0;
-
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size)
- *metadata_bytes = block_rsv->size - block_rsv->reserved;
- if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
- *qgroup_bytes = block_rsv->qgroup_rsv_size -
- block_rsv->qgroup_rsv_reserved;
- spin_unlock(&block_rsv->lock);
-}
-
-/**
- * btrfs_inode_rsv_refill - refill the inode block rsv.
- * @inode - the inode we are refilling.
- * @flush - the flushing restriction.
- *
- * Essentially the same as btrfs_block_rsv_refill, except it uses the
- * block_rsv->size as the minimum size. We'll either refill the missing amount
- * or return if we already have enough space. This will also handle the reserve
- * tracepoint for the reserved amount.
- */
-static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 num_bytes, last = 0;
- u64 qgroup_num_bytes;
- int ret = -ENOSPC;
-
- calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
- if (num_bytes == 0)
- return 0;
-
- do {
- ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
- true);
- if (ret)
- return ret;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (ret) {
- btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
- last = num_bytes;
- /*
- * If we are fragmented we can end up with a lot of
- * outstanding extents which will make our size be much
- * larger than our reserved amount.
- *
- * If the reservation happens here, it might be very
- * big though not needed in the end, if the delalloc
- * flushing happens.
- *
- * If this is the case try and do the reserve again.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- calc_refill_bytes(block_rsv, &num_bytes,
- &qgroup_num_bytes);
- if (num_bytes == 0)
- return 0;
- }
- } while (ret && last != num_bytes);
-
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, false);
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), num_bytes, 1);
-
- /* Don't forget to increase qgroup_rsv_reserved */
- spin_lock(&block_rsv->lock);
- block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
- spin_unlock(&block_rsv->lock);
- }
- return ret;
-}
-
static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, u64 *qgroup_to_release)
@@ -6182,9 +6015,25 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
}
+static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+ u64 num_bytes, u64 *meta_reserve,
+ u64 *qgroup_reserve)
+{
+ u64 nr_extents = count_max_extents(num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+
+ /* We add one for the inode update at finish ordered time */
+ *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
+ nr_extents + csum_leaves + 1);
+ *qgroup_reserve = nr_extents * fs_info->nodesize;
+}
+
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+ u64 meta_reserve, qgroup_reserve;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@@ -6214,7 +6063,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- /* Add our new extents and calculate the new rsv size. */
+ /*
+ * We always want to do it this way, every other way is wrong and ends
+ * in tears. Pre-reserving the amount we are going to add will always
+ * be the right way, because otherwise if we have enough parallelism we
+ * could end up with thousands of inodes all holding little bits of
+ * reservations they were able to make previously and the only way to
+ * reclaim that space is to ENOSPC out the operations and clear
+ * everything out and try again, which is bad. This way we just
+ * over-reserve slightly, and clean up the mess when we are done.
+ */
+ calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+ &qgroup_reserve);
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
+ if (ret)
+ goto out_fail;
+ ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
+ if (ret)
+ goto out_qgroup;
+
+ /*
+ * Now we need to update our outstanding extents and csum bytes _first_
+ * and then add the reservation to the block_rsv. This keeps us from
+ * racing with an ordered completion or some such that would think it
+ * needs to free the reservation we just made.
+ */
spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents);
@@ -6222,22 +6095,21 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
- ret = btrfs_inode_rsv_refill(inode, flush);
- if (unlikely(ret))
- goto out_fail;
+ /* Now we can safely add our space to our block rsv */
+ block_rsv_add_bytes(block_rsv, meta_reserve, false);
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
+ btrfs_ino(inode), meta_reserve, 1);
+
+ spin_lock(&block_rsv->lock);
+ block_rsv->qgroup_rsv_reserved += qgroup_reserve;
+ spin_unlock(&block_rsv->lock);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return 0;
-
+out_qgroup:
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
out_fail:
- spin_lock(&inode->lock);
- nr_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, -nr_extents);
- inode->csum_bytes -= num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
btrfs_inode_rsv_release(inode, true);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
@@ -6361,9 +6233,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
}
static int update_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *info, u64 bytenr,
- u64 num_bytes, int alloc)
+ u64 bytenr, u64 num_bytes, int alloc)
{
+ struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_block_group_cache *cache = NULL;
u64 total = num_bytes;
u64 old_val;
@@ -6444,7 +6316,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
- trans->transaction->num_dirty_bgs++;
trans->delayed_ref_updates++;
btrfs_get_block_group(cache);
}
@@ -6491,10 +6362,11 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
return bytenr;
}
-static int pin_down_extent(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+static int pin_down_extent(struct btrfs_block_group_cache *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
@@ -6526,7 +6398,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache); /* Logic error */
- pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
+ pin_down_extent(cache, bytenr, num_bytes, reserved);
btrfs_put_block_group(cache);
return 0;
@@ -6553,7 +6425,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
*/
cache_block_group(cache, 1);
- pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
+ pin_down_extent(cache, bytenr, num_bytes, 0);
/* remove us from the free space cache (if we're there at all) */
ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
@@ -6607,9 +6479,9 @@ out_lock:
return ret;
}
-int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+int btrfs_exclude_logged_extents(struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_file_extent_item *item;
struct btrfs_key key;
int found_type;
@@ -7198,7 +7070,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
- ret = update_block_group(trans, info, bytenr, num_bytes, 0);
+ ret = update_block_group(trans, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7272,21 +7144,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
u64 parent, int last_ref)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ref generic_ref = { 0 };
int pin = 1;
int ret;
+ btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+ buf->start, buf->len, parent);
+ btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+ root->root_key.objectid);
+
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
int old_ref_mod, new_ref_mod;
- btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
- root->root_key.objectid,
- btrfs_header_level(buf), 0,
- BTRFS_DROP_DELAYED_REF);
- ret = btrfs_add_delayed_tree_ref(trans, buf->start,
- buf->len, parent,
- root->root_key.objectid,
- btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL,
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
&old_ref_mod, &new_ref_mod);
BUG_ON(ret); /* -ENOMEM */
pin = old_ref_mod >= 0 && new_ref_mod < 0;
@@ -7305,8 +7176,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- pin_down_extent(fs_info, cache, buf->start,
- buf->len, 1);
+ pin_down_extent(cache, buf->start, buf->len, 1);
btrfs_put_block_group(cache);
goto out;
}
@@ -7320,8 +7190,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, buf->len, true,
- root->root_key.objectid);
+ add_pinned_bytes(fs_info, &generic_ref);
if (last_ref) {
/*
@@ -7333,52 +7202,43 @@ out:
}
/* Can return -ENOMEM */
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int old_ref_mod, new_ref_mod;
int ret;
if (btrfs_is_testing(fs_info))
return 0;
- if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
- btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
- root_objectid, owner, offset,
- BTRFS_DROP_DELAYED_REF);
-
/*
* tree log blocks never actually go into the extent allocation
* tree, just update pinning info and exit early.
*/
- if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
- WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
+ if ((ref->type == BTRFS_REF_METADATA &&
+ ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+ (ref->type == BTRFS_REF_DATA &&
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
/* unlocks the pinned mutex */
- btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
+ btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
old_ref_mod = new_ref_mod = 0;
ret = 0;
- } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL,
+ } else if (ref->type == BTRFS_REF_METADATA) {
+ ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(trans, bytenr,
- num_bytes, parent,
- root_objectid, owner, offset,
- 0, BTRFS_DROP_DELAYED_REF,
+ ret = btrfs_add_delayed_data_ref(trans, ref, 0,
&old_ref_mod, &new_ref_mod);
}
- if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
- bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+ if (!((ref->type == BTRFS_REF_METADATA &&
+ ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
+ (ref->type == BTRFS_REF_DATA &&
+ ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
+ btrfs_ref_tree_mod(fs_info, ref);
- add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
- }
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
+ add_pinned_bytes(fs_info, ref);
return ret;
}
@@ -7569,7 +7429,6 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_block_group_cache **cluster_bg_ret)
{
- struct btrfs_fs_info *fs_info = bg->fs_info;
struct btrfs_block_group_cache *cluster_bg;
u64 aligned_cluster;
u64 offset;
@@ -7629,9 +7488,8 @@ refill_cluster:
aligned_cluster = max_t(u64,
ffe_ctl->empty_cluster + ffe_ctl->empty_size,
bg->full_stripe_len);
- ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
- ffe_ctl->search_start, ffe_ctl->num_bytes,
- aligned_cluster);
+ ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
+ ffe_ctl->num_bytes, aligned_cluster);
if (ret == 0) {
/* Now pull our allocation out of this cluster */
offset = btrfs_alloc_from_cluster(bg, last_ptr,
@@ -8281,7 +8139,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
}
if (pin)
- pin_down_extent(fs_info, cache, start, len, 1);
+ pin_down_extent(cache, start, len, 1);
else {
if (btrfs_test_opt(fs_info, DISCARD))
ret = btrfs_discard_extent(fs_info, start, len, NULL);
@@ -8370,7 +8228,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
+ ret = update_block_group(trans, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
@@ -8460,7 +8318,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = update_block_group(trans, fs_info, extent_key.objectid,
+ ret = update_block_group(trans, extent_key.objectid,
fs_info->nodesize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -8478,19 +8336,17 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins)
{
+ struct btrfs_ref generic_ref = { 0 };
int ret;
BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
- btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
- root->root_key.objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT);
-
- ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
- ins->offset, 0,
- root->root_key.objectid, owner,
- offset, ram_bytes,
- BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins->objectid, ins->offset, 0);
+ btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
+ btrfs_ref_tree_mod(root->fs_info, &generic_ref);
+ ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
+ ram_bytes, NULL, NULL);
return ret;
}
@@ -8563,7 +8419,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
- clean_tree_block(fs_info, buf);
+ btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking_write(buf);
@@ -8682,6 +8538,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
struct btrfs_delayed_extent_op *extent_op;
+ struct btrfs_ref generic_ref = { 0 };
u64 flags = 0;
int ret;
u32 blocksize = fs_info->nodesize;
@@ -8736,13 +8593,12 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->is_data = false;
extent_op->level = level;
- btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
- root_objectid, level, 0,
- BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
- ins.offset, parent,
- root_objectid, level,
- BTRFS_ADD_DELAYED_EXTENT,
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins.objectid, ins.offset, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&generic_ref, level, root_objectid);
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
extent_op, NULL, NULL);
if (ret)
goto out_free_delayed;
@@ -8918,7 +8774,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
+ ret = btrfs_set_disk_extent_flags(trans, eb->start,
eb->len, flag,
btrfs_header_level(eb), 0);
BUG_ON(ret); /* -ENOMEM */
@@ -8987,6 +8843,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 parent;
struct btrfs_key key;
struct btrfs_key first_key;
+ struct btrfs_ref ref = { 0 };
struct extent_buffer *next;
int level = wc->level;
int reada = 0;
@@ -9159,9 +9016,10 @@ skip:
wc->drop_level = level;
find_next_key(path, level, &wc->drop_progress);
- ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
- parent, root->root_key.objectid,
- level - 1, 0);
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ fs_info->nodesize, parent);
+ btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
+ ret = btrfs_free_extent(trans, &ref);
if (ret)
goto out_unlock;
}
@@ -9251,21 +9109,23 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_qgroup_trace_leaf_items(trans, eb);
- if (ret) {
- btrfs_err_rl(fs_info,
- "error %d accounting leaf items. Quota is out of sync, rescan required.",
+ if (is_fstree(root->root_key.objectid)) {
+ ret = btrfs_qgroup_trace_leaf_items(trans, eb);
+ if (ret) {
+ btrfs_err_rl(fs_info,
+ "error %d accounting leaf items, quota is out of sync, rescan required",
ret);
+ }
}
}
- /* make block locked assertion in clean_tree_block happy */
+ /* make block locked assertion in btrfs_clean_tree_block happy */
if (!path->locks[level] &&
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
- clean_tree_block(fs_info, eb);
+ btrfs_clean_tree_block(eb);
}
if (eb == root->node) {
@@ -9921,12 +9781,10 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
*/
int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
{
- struct btrfs_root *root = fs_info->extent_root;
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
- struct btrfs_trans_handle *trans;
u64 min_free;
u64 dev_min = 1;
u64 dev_nr = 0;
@@ -10025,13 +9883,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
min_free = div64_u64(min_free, dev_min);
}
- /* We need to do this so that we can look at pending chunks */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
-
mutex_lock(&fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 dev_offset;
@@ -10042,7 +9893,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
*/
if (device->total_bytes > device->bytes_used + min_free &&
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = find_free_dev_extent(trans, device, min_free,
+ ret = find_free_dev_extent(device, min_free,
&dev_offset, NULL);
if (!ret)
dev_nr++;
@@ -10058,7 +9909,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
"no space to allocate a new chunk for block group %llu",
block_group->key.objectid);
mutex_unlock(&fs_info->chunk_mutex);
- btrfs_end_transaction(trans);
out:
btrfs_put_block_group(block_group);
return ret;
@@ -10159,7 +10009,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
if (block_group->iref)
break;
spin_unlock(&block_group->lock);
- block_group = next_block_group(info, block_group);
+ block_group = next_block_group(block_group);
}
if (!block_group) {
if (last == 0)
@@ -10660,7 +10510,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
struct btrfs_block_group_cache *cache;
int ret;
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
if (!cache)
@@ -10808,7 +10658,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* get the inode first so any iput calls done for the io_list
* aren't the final iput (no unlinks allowed now)
*/
- inode = lookup_free_space_inode(fs_info, block_group, path);
+ inode = lookup_free_space_inode(block_group, path);
mutex_lock(&trans->transaction->cache_write_mutex);
/*
@@ -10952,10 +10802,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
memcpy(&key, &block_group->key, sizeof(key));
mutex_lock(&fs_info->chunk_mutex);
- if (!list_empty(&em->list)) {
- /* We're in the transaction->pending_chunks list. */
- free_extent_map(em);
- }
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
@@ -10982,25 +10828,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
- /*
- * Make sure a trimmer task always sees the em in the pinned_chunks list
- * if it sees block_group->removed == 1 (needs to lock block_group->lock
- * before checking block_group->removed).
- */
- if (!remove_em) {
- /*
- * Our em might be in trans->transaction->pending_chunks which
- * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
- * and so is the fs_info->pinned_chunks list.
- *
- * So at this point we must be holding the chunk_mutex to avoid
- * any races with chunk allocation (more specifically at
- * volumes.c:contains_pending_extent()), to ensure it always
- * sees the em, either in the pending_chunks list or in the
- * pinned_chunks list.
- */
- list_move_tail(&em->list, &fs_info->pinned_chunks);
- }
spin_unlock(&block_group->lock);
if (remove_em) {
@@ -11008,11 +10835,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
em_tree = &fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
- /*
- * The em might be in the pending_chunks list, so make sure the
- * chunk mutex is locked, since remove_extent_mapping() will
- * delete us from that list.
- */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* once for the tree */
@@ -11315,11 +11137,12 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
* held back allocations.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start, len = 0, end = 0;
int ret;
+ start = max_t(u64, range->start, SZ_1M);
*trimmed = 0;
/* Discard not supported = nothing to do. */
@@ -11338,43 +11161,52 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
while (1) {
struct btrfs_fs_info *fs_info = device->fs_info;
- struct btrfs_transaction *trans;
u64 bytes;
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
if (ret)
break;
- ret = down_read_killable(&fs_info->commit_root_sem);
- if (ret) {
+ find_first_clear_extent_bit(&device->alloc_state, start,
+ &start, &end,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ /*
+ * If find_first_clear_extent_bit find a range that spans the
+ * end of the device it will set end to -1, in this case it's up
+ * to the caller to trim the value to the size of the device.
+ */
+ end = min(end, device->total_bytes - 1);
+ len = end - start + 1;
+
+ /* We didn't find any extents */
+ if (!len) {
mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
break;
}
- spin_lock(&fs_info->trans_lock);
- trans = fs_info->running_transaction;
- if (trans)
- refcount_inc(&trans->use_count);
- spin_unlock(&fs_info->trans_lock);
-
- if (!trans)
- up_read(&fs_info->commit_root_sem);
-
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
- if (trans) {
- up_read(&fs_info->commit_root_sem);
- btrfs_put_transaction(trans);
+ /* Keep going until we satisfy minlen or reach end of space */
+ if (len < range->minlen) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ start += len;
+ continue;
}
- if (ret) {
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
mutex_unlock(&fs_info->chunk_mutex);
- if (ret == -ENOSPC)
- ret = 0;
break;
}
- ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
+ start = max(range->start, start);
+ len = min(range->len, len);
+
+ ret = btrfs_issue_discard(device->bdev, start, len,
+ &bytes);
+ if (!ret)
+ set_extent_bits(&device->alloc_state, start,
+ start + bytes - 1,
+ CHUNK_TRIMMED);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
@@ -11383,6 +11215,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -11419,7 +11255,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
int ret = 0;
cache = btrfs_lookup_first_block_group(fs_info, range->start);
- for (; cache; cache = next_block_group(fs_info, cache)) {
+ for (; cache; cache = next_block_group(cache)) {
if (cache->key.objectid >= (range->start + range->len)) {
btrfs_put_block_group(cache);
break;
@@ -11466,8 +11302,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret) {
dev_failed++;
dev_ret = ret;