aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2503
1 files changed, 126 insertions, 2377 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5faf057f6f37..d3b58e388535 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -28,46 +28,12 @@
#include "sysfs.h"
#include "qgroup.h"
#include "ref-verify.h"
+#include "space-info.h"
+#include "block-rsv.h"
+#include "delalloc-space.h"
#undef SCRAMBLE_DELAYED_REFS
-/*
- * control flags for do_chunk_alloc's force field
- * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
- * if we really need one.
- *
- * CHUNK_ALLOC_LIMITED means to only try and allocate one
- * if we have very few chunks already allocated. This is
- * used as part of the clustering code to help make sure
- * we have a good pool of storage to cluster in, without
- * filling the FS with empty chunks
- *
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
- */
-enum {
- CHUNK_ALLOC_NO_FORCE = 0,
- CHUNK_ALLOC_LIMITED = 1,
- CHUNK_ALLOC_FORCE = 2,
-};
-
-/*
- * Declare a helper function to detect underflow of various space info members
- */
-#define DECLARE_SPACE_INFO_UPDATE(name) \
-static inline void update_##name(struct btrfs_space_info *sinfo, \
- s64 bytes) \
-{ \
- if (bytes < 0 && sinfo->name < -bytes) { \
- WARN_ON(1); \
- sinfo->name = 0; \
- return; \
- } \
- sinfo->name += bytes; \
-}
-
-DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
-DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node, u64 parent,
@@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- int force);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
-static void dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups);
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes);
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -737,62 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
return block_group_cache_tree_search(info, bytenr, 1);
}
-static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
- u64 flags)
+static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & flags) {
- rcu_read_unlock();
- return found;
- }
+ if (ref->type == BTRFS_REF_METADATA) {
+ if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
+ return BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ return BTRFS_BLOCK_GROUP_METADATA;
}
- rcu_read_unlock();
- return NULL;
+ return BTRFS_BLOCK_GROUP_DATA;
}
static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_ref *ref, int sign)
+ struct btrfs_ref *ref)
{
struct btrfs_space_info *space_info;
- s64 num_bytes;
- u64 flags;
-
- ASSERT(sign == 1 || sign == -1);
- num_bytes = sign * ref->len;
- if (ref->type == BTRFS_REF_METADATA) {
- if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- } else {
- flags = BTRFS_BLOCK_GROUP_DATA;
- }
+ u64 flags = generic_ref_to_space_flags(ref);
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
-/*
- * after adding space to the filesystem, we need to clear the full flags
- * on all the space infos.
- */
-void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
+ struct btrfs_ref *ref)
{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
+ struct btrfs_space_info *space_info;
+ u64 flags = generic_ref_to_space_flags(ref);
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
- found->full = 0;
- rcu_read_unlock();
+ space_info = btrfs_find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
/* simple helper to search for an existing data extent at a given offset */
@@ -1121,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
__le64 lenum;
lenum = cpu_to_le64(root_objectid);
- high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+ high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(owner);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(offset);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
@@ -2065,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_ref_tree_mod(fs_info, generic_ref);
if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
- add_pinned_bytes(fs_info, generic_ref, -1);
+ sub_pinned_bytes(fs_info, generic_ref);
return ret;
}
@@ -2462,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
percpu_counter_add_batch(&space_info->total_bytes_pinned,
-head->num_bytes,
@@ -2824,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
return num_csums;
}
-bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- bool ret = false;
- u64 reserved;
-
- spin_lock(&global_rsv->lock);
- reserved = global_rsv->reserved;
- spin_unlock(&global_rsv->lock);
-
- /*
- * Since the global reserve is just kind of magic we don't really want
- * to rely on it to save our bacon, so if our size is more than the
- * delayed_refs_rsv and the global rsv then it's time to think about
- * bailing.
- */
- spin_lock(&delayed_refs_rsv->lock);
- reserved += delayed_refs_rsv->reserved;
- if (delayed_refs_rsv->size >= reserved)
- ret = true;
- spin_unlock(&delayed_refs_rsv->lock);
- return ret;
-}
-
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
-{
- u64 num_entries =
- atomic_read(&trans->transaction->delayed_refs.num_entries);
- u64 avg_runtime;
- u64 val;
-
- smp_mb();
- avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
- val = num_entries * avg_runtime;
- if (val >= NSEC_PER_SEC)
- return 1;
- if (val >= NSEC_PER_SEC / 2)
- return 2;
-
- return btrfs_check_space_for_delayed_refs(trans->fs_info);
-}
-
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -3834,93 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
}
-static const char *alloc_name(u64 flags)
-{
- switch (flags) {
- case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
- return "mixed";
- case BTRFS_BLOCK_GROUP_METADATA:
- return "metadata";
- case BTRFS_BLOCK_GROUP_DATA:
- return "data";
- case BTRFS_BLOCK_GROUP_SYSTEM:
- return "system";
- default:
- WARN_ON(1);
- return "invalid-combination";
- };
-}
-
-static int create_space_info(struct btrfs_fs_info *info, u64 flags)
-{
-
- struct btrfs_space_info *space_info;
- int i;
- int ret;
-
- space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
- if (!space_info)
- return -ENOMEM;
-
- ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
- GFP_KERNEL);
- if (ret) {
- kfree(space_info);
- return ret;
- }
-
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- INIT_LIST_HEAD(&space_info->block_groups[i]);
- init_rwsem(&space_info->groups_sem);
- spin_lock_init(&space_info->lock);
- space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
- space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
- init_waitqueue_head(&space_info->wait);
- INIT_LIST_HEAD(&space_info->ro_bgs);
- INIT_LIST_HEAD(&space_info->tickets);
- INIT_LIST_HEAD(&space_info->priority_tickets);
-
- ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
- info->space_info_kobj, "%s",
- alloc_name(space_info->flags));
- if (ret) {
- kobject_put(&space_info->kobj);
- return ret;
- }
-
- list_add_rcu(&space_info->list, &info->space_info);
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- info->data_sinfo = space_info;
-
- return ret;
-}
-
-static void update_space_info(struct btrfs_fs_info *info, u64 flags,
- u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
- struct btrfs_space_info **space_info)
-{
- struct btrfs_space_info *found;
- int factor;
-
- factor = btrfs_bg_type_to_factor(flags);
-
- found = __find_space_info(info, flags);
- ASSERT(found);
- spin_lock(&found->lock);
- found->total_bytes += total_bytes;
- found->disk_total += total_bytes * factor;
- found->bytes_used += bytes_used;
- found->disk_used += bytes_used * factor;
- found->bytes_readonly += bytes_readonly;
- if (total_bytes > 0)
- found->full = 0;
- space_info_add_new_bytes(info, found, total_bytes -
- bytes_used - bytes_readonly);
- spin_unlock(&found->lock);
- *space_info = found;
-}
-
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = chunk_to_extended(flags) &
@@ -4068,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
-static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
- bool may_use_included)
-{
- ASSERT(s_info);
- return s_info->bytes_used + s_info->bytes_reserved +
- s_info->bytes_pinned + s_info->bytes_readonly +
- (may_use_included ? s_info->bytes_may_use : 0);
-}
-
-int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
- u64 used;
- int ret = 0;
- int need_commit = 2;
- int have_pinned_space;
-
- /* make sure bytes are sectorsize aligned */
- bytes = ALIGN(bytes, fs_info->sectorsize);
-
- if (btrfs_is_free_space_inode(inode)) {
- need_commit = 0;
- ASSERT(current->journal_info);
- }
-
-again:
- /* make sure we have enough space to handle the data first */
- spin_lock(&data_sinfo->lock);
- used = btrfs_space_info_used(data_sinfo, true);
-
- if (used + bytes > data_sinfo->total_bytes) {
- struct btrfs_trans_handle *trans;
-
- /*
- * if we don't have enough free bytes in this space then we need
- * to alloc a new chunk.
- */
- if (!data_sinfo->full) {
- u64 alloc_target;
-
- data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
- spin_unlock(&data_sinfo->lock);
-
- alloc_target = btrfs_data_alloc_profile(fs_info);
- /*
- * It is ugly that we don't call nolock join
- * transaction for the free space inode case here.
- * But it is safe because we only do the data space
- * reservation for the free space cache in the
- * transaction context, the common join transaction
- * just increase the counter of the current transaction
- * handler, doesn't try to acquire the trans_lock of
- * the fs.
- */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = do_chunk_alloc(trans, alloc_target,
- CHUNK_ALLOC_NO_FORCE);
- btrfs_end_transaction(trans);
- if (ret < 0) {
- if (ret != -ENOSPC)
- return ret;
- else {
- have_pinned_space = 1;
- goto commit_trans;
- }
- }
-
- goto again;
- }
-
- /*
- * If we don't have enough pinned space to deal with this
- * allocation, and no removed chunk in current transaction,
- * don't bother committing the transaction.
- */
- have_pinned_space = __percpu_counter_compare(
- &data_sinfo->total_bytes_pinned,
- used + bytes - data_sinfo->total_bytes,
- BTRFS_TOTAL_BYTES_PINNED_BATCH);
- spin_unlock(&data_sinfo->lock);
-
- /* commit the current transaction and try again */
-commit_trans:
- if (need_commit) {
- need_commit--;
-
- if (need_commit > 0) {
- btrfs_start_delalloc_roots(fs_info, -1);
- btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
- (u64)-1);
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- if (have_pinned_space >= 0 ||
- test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
- &trans->transaction->flags) ||
- need_commit > 0) {
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ret;
- /*
- * The cleaner kthread might still be doing iput
- * operations. Wait for it to finish so that
- * more space is released. We don't need to
- * explicitly run the delayed iputs here because
- * the commit_transaction would have woken up
- * the cleaner.
- */
- ret = btrfs_wait_on_delayed_iputs(fs_info);
- if (ret)
- return ret;
- goto again;
- } else {
- btrfs_end_transaction(trans);
- }
- }
-
- trace_btrfs_space_reservation(fs_info,
- "space_info:enospc",
- data_sinfo->flags, bytes, 1);
- return -ENOSPC;
- }
- update_bytes_may_use(data_sinfo, bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- data_sinfo->flags, bytes, 1);
- spin_unlock(&data_sinfo->lock);
-
- return 0;
-}
-
-int btrfs_check_data_free_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
-
- /* align the range */
- len = round_up(start + len, fs_info->sectorsize) -
- round_down(start, fs_info->sectorsize);
- start = round_down(start, fs_info->sectorsize);
-
- ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
- if (ret < 0)
- return ret;
-
- /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
- ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
- if (ret < 0)
- btrfs_free_reserved_data_space_noquota(inode, start, len);
- else
- ret = 0;
- return ret;
-}
-
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will *NOT* use accurate qgroup reserved space API, just for case
- * which we can't sleep and is sure it won't affect qgroup reserved space.
- * Like clear_bit_hook().
- */
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
- u64 len)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_space_info *data_sinfo;
-
- /* Make sure the range is aligned to sectorsize */
- len = round_up(start + len, fs_info->sectorsize) -
- round_down(start, fs_info->sectorsize);
- start = round_down(start, fs_info->sectorsize);
-
- data_sinfo = fs_info->data_sinfo;
- spin_lock(&data_sinfo->lock);
- update_bytes_may_use(data_sinfo, -len);
- trace_btrfs_space_reservation(fs_info, "space_info",
- data_sinfo->flags, len, 0);
- spin_unlock(&data_sinfo->lock);
-}
-
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will handle the per-inode data rsv map for accurate reserved
- * space framework.
- */
-void btrfs_free_reserved_data_space(struct inode *inode,
- struct extent_changeset *reserved, u64 start, u64 len)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /* Make sure the range is aligned to sectorsize */
- len = round_up(start + len, root->fs_info->sectorsize) -
- round_down(start, root->fs_info->sectorsize);
- start = round_down(start, root->fs_info->sectorsize);
-
- btrfs_free_reserved_data_space_noquota(inode, start, len);
- btrfs_qgroup_free_data(inode, reserved, start, len);
-}
-
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
@@ -4290,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
-{
- return (global->size << 1);
-}
-
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
@@ -4325,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
{
u64 num_dev;
- if (type & (BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
+ num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
+ if (!num_dev)
num_dev = fs_info->fs_devices->rw_devices;
- else if (type & BTRFS_BLOCK_GROUP_RAID1)
- num_dev = 2;
- else
- num_dev = 1; /* DUP or single */
return num_dev;
}
@@ -4358,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
*/
lockdep_assert_held(&fs_info->chunk_mutex);
- info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
@@ -4372,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
- dump_space_info(fs_info, info, 0, 0);
+ btrfs_dump_space_info(fs_info, info, 0, 0);
}
if (left < thresh) {
@@ -4405,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- int force)
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
@@ -4418,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (trans->allocating_chunk)
return -ENOSPC;
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
do {
@@ -4525,1714 +4105,6 @@ out:
return ret;
}
-static int can_overcommit(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush,
- bool system_chunk)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 profile;
- u64 space_size;
- u64 avail;
- u64 used;
- int factor;
-
- /* Don't overcommit when in mixed mode. */
- if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
- return 0;
-
- if (system_chunk)
- profile = btrfs_system_alloc_profile(fs_info);
- else
- profile = btrfs_metadata_alloc_profile(fs_info);
-
- used = btrfs_space_info_used(space_info, false);
-
- /*
- * We only want to allow over committing if we have lots of actual space
- * free, but if we don't have enough space to handle the global reserve
- * space then we could end up having a real enospc problem when trying
- * to allocate a chunk or some other such important allocation.
- */
- spin_lock(&global_rsv->lock);
- space_size = calc_global_rsv_need_space(global_rsv);
- spin_unlock(&global_rsv->lock);
- if (used + space_size >= space_info->total_bytes)
- return 0;
-
- used += space_info->bytes_may_use;
-
- avail = atomic64_read(&fs_info->free_chunk_space);
-
- /*
- * If we have dup, raid1 or raid10 then only half of the free
- * space is actually usable. For raid56, the space info used
- * doesn't include the parity drive, so we don't have to
- * change the math
- */
- factor = btrfs_bg_type_to_factor(profile);
- avail = div_u64(avail, factor);
-
- /*
- * If we aren't flushing all things, let us overcommit up to
- * 1/2th of the space. If we can flush, don't let us overcommit
- * too much, let it overcommit up to 1/8 of the space.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- avail >>= 3;
- else
- avail >>= 1;
-
- if (used + bytes < space_info->total_bytes + avail)
- return 1;
- return 0;
-}
-
-static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
- unsigned long nr_pages, int nr_items)
-{
- struct super_block *sb = fs_info->sb;
-
- if (down_read_trylock(&sb->s_umount)) {
- writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
- up_read(&sb->s_umount);
- } else {
- /*
- * We needn't worry the filesystem going from r/w to r/o though
- * we don't acquire ->s_umount mutex, because the filesystem
- * should guarantee the delalloc inodes list be empty after
- * the filesystem is readonly(all dirty pages are written to
- * the disk).
- */
- btrfs_start_delalloc_roots(fs_info, nr_items);
- if (!current->journal_info)
- btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
- }
-}
-
-static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
- u64 to_reclaim)
-{
- u64 bytes;
- u64 nr;
-
- bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- nr = div64_u64(to_reclaim, bytes);
- if (!nr)
- nr = 1;
- return nr;
-}
-
-#define EXTENT_SIZE_PER_ITEM SZ_256K
-
-/*
- * shrink metadata reservation for delalloc
- */
-static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
- u64 orig, bool wait_ordered)
-{
- struct btrfs_space_info *space_info;
- struct btrfs_trans_handle *trans;
- u64 delalloc_bytes;
- u64 dio_bytes;
- u64 async_pages;
- u64 items;
- long time_left;
- unsigned long nr_pages;
- int loops;
-
- /* Calc the number of the pages we need flush for space reservation */
- items = calc_reclaim_items_nr(fs_info, to_reclaim);
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
- if (delalloc_bytes == 0 && dio_bytes == 0) {
- if (trans)
- return;
- if (wait_ordered)
- btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
- return;
- }
-
- /*
- * If we are doing more ordered than delalloc we need to just wait on
- * ordered extents, otherwise we'll waste time trying to flush delalloc
- * that likely won't give us the space back we need.
- */
- if (dio_bytes > delalloc_bytes)
- wait_ordered = true;
-
- loops = 0;
- while ((delalloc_bytes || dio_bytes) && loops < 3) {
- nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
-
- /*
- * Triggers inode writeback for up to nr_pages. This will invoke
- * ->writepages callback and trigger delalloc filling
- * (btrfs_run_delalloc_range()).
- */
- btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
-
- /*
- * We need to wait for the compressed pages to start before
- * we continue.
- */
- async_pages = atomic_read(&fs_info->async_delalloc_pages);
- if (!async_pages)
- goto skip_async;
-
- /*
- * Calculate how many compressed pages we want to be written
- * before we continue. I.e if there are more async pages than we
- * require wait_event will wait until nr_pages are written.
- */
- if (async_pages <= nr_pages)
- async_pages = 0;
- else
- async_pages -= nr_pages;
-
- wait_event(fs_info->async_submit_wait,
- atomic_read(&fs_info->async_delalloc_pages) <=
- (int)async_pages);
-skip_async:
- spin_lock(&space_info->lock);
- if (list_empty(&space_info->tickets) &&
- list_empty(&space_info->priority_tickets)) {
- spin_unlock(&space_info->lock);
- break;
- }
- spin_unlock(&space_info->lock);
-
- loops++;
- if (wait_ordered && !trans) {
- btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
- } else {
- time_left = schedule_timeout_killable(1);
- if (time_left)
- break;
- }
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
- }
-}
-
-struct reserve_ticket {
- u64 orig_bytes;
- u64 bytes;
- int error;
- struct list_head list;
- wait_queue_head_t wait;
-};
-
-/**
- * maybe_commit_transaction - possibly commit the transaction if its ok to
- * @root - the root we're allocating for
- * @bytes - the number of bytes we want to reserve
- * @force - force the commit
- *
- * This will check to make sure that committing the transaction will actually
- * get us somewhere and then commit the transaction if it does. Otherwise it
- * will return -ENOSPC.
- */
-static int may_commit_transaction(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
-{
- struct reserve_ticket *ticket = NULL;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_trans_handle *trans;
- u64 bytes_needed;
- u64 reclaim_bytes = 0;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- if (trans)
- return -EAGAIN;
-
- spin_lock(&space_info->lock);
- if (!list_empty(&space_info->priority_tickets))
- ticket = list_first_entry(&space_info->priority_tickets,
- struct reserve_ticket, list);
- else if (!list_empty(&space_info->tickets))
- ticket = list_first_entry(&space_info->tickets,
- struct reserve_ticket, list);
- bytes_needed = (ticket) ? ticket->bytes : 0;
- spin_unlock(&space_info->lock);
-
- if (!bytes_needed)
- return 0;
-
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- /*
- * See if there is enough pinned space to make this reservation, or if
- * we have block groups that are going to be freed, allowing us to
- * possibly do a chunk allocation the next loop through.
- */
- if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
- __percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
- goto commit;
-
- /*
- * See if there is some space in the delayed insertion reservation for
- * this reservation.
- */
- if (space_info != delayed_rsv->space_info)
- goto enospc;
-
- spin_lock(&delayed_rsv->lock);
- reclaim_bytes += delayed_rsv->reserved;
- spin_unlock(&delayed_rsv->lock);
-
- spin_lock(&delayed_refs_rsv->lock);
- reclaim_bytes += delayed_refs_rsv->reserved;
- spin_unlock(&delayed_refs_rsv->lock);
- if (reclaim_bytes >= bytes_needed)
- goto commit;
- bytes_needed -= reclaim_bytes;
-
- if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
- goto enospc;
-
-commit:
- return btrfs_commit_transaction(trans);
-enospc:
- btrfs_end_transaction(trans);
- return -ENOSPC;
-}
-
-/*
- * Try to flush some data based on policy set by @state. This is only advisory
- * and may fail for various reasons. The caller is supposed to examine the
- * state of @space_info to detect the outcome.
- */
-static void flush_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 num_bytes,
- int state)
-{
- struct btrfs_root *root = fs_info->extent_root;
- struct btrfs_trans_handle *trans;
- int nr;
- int ret = 0;
-
- switch (state) {
- case FLUSH_DELAYED_ITEMS_NR:
- case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR)
- nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
- else
- nr = -1;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- ret = btrfs_run_delayed_items_nr(trans, nr);
- btrfs_end_transaction(trans);
- break;
- case FLUSH_DELALLOC:
- case FLUSH_DELALLOC_WAIT:
- shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
- state == FLUSH_DELALLOC_WAIT);
- break;
- case FLUSH_DELAYED_REFS_NR:
- case FLUSH_DELAYED_REFS:
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- if (state == FLUSH_DELAYED_REFS_NR)
- nr = calc_reclaim_items_nr(fs_info, num_bytes);
- else
- nr = 0;
- btrfs_run_delayed_refs(trans, nr);
- btrfs_end_transaction(trans);
- break;
- case ALLOC_CHUNK:
- case ALLOC_CHUNK_FORCE:
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- ret = do_chunk_alloc(trans,
- btrfs_metadata_alloc_profile(fs_info),
- (state == ALLOC_CHUNK) ?
- CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
- btrfs_end_transaction(trans);
- if (ret > 0 || ret == -ENOSPC)
- ret = 0;
- break;
- case COMMIT_TRANS:
- /*
- * If we have pending delayed iputs then we could free up a
- * bunch of pinned space, so make sure we run the iputs before
- * we do our pinned bytes check below.
- */
- btrfs_run_delayed_iputs(fs_info);
- btrfs_wait_on_delayed_iputs(fs_info);
-
- ret = may_commit_transaction(fs_info, space_info);
- break;
- default:
- ret = -ENOSPC;
- break;
- }
-
- trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
- ret);
- return;
-}
-
-static inline u64
-btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- bool system_chunk)
-{
- struct reserve_ticket *ticket;
- u64 used;
- u64 expected;
- u64 to_reclaim = 0;
-
- list_for_each_entry(ticket, &space_info->tickets, list)
- to_reclaim += ticket->bytes;
- list_for_each_entry(ticket, &space_info->priority_tickets, list)
- to_reclaim += ticket->bytes;
- if (to_reclaim)
- return to_reclaim;
-
- to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- if (can_overcommit(fs_info, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL, system_chunk))
- return 0;
-
- used = btrfs_space_info_used(space_info, true);
-
- if (can_overcommit(fs_info, space_info, SZ_1M,
- BTRFS_RESERVE_FLUSH_ALL, system_chunk))
- expected = div_factor_fine(space_info->total_bytes, 95);
- else
- expected = div_factor_fine(space_info->total_bytes, 90);
-
- if (used > expected)
- to_reclaim = used - expected;
- else
- to_reclaim = 0;
- to_reclaim = min(to_reclaim, space_info->bytes_may_use +
- space_info->bytes_reserved);
- return to_reclaim;
-}
-
-static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 used, bool system_chunk)
-{
- u64 thresh = div_factor_fine(space_info->total_bytes, 98);
-
- /* If we're just plain full then async reclaim just slows us down. */
- if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
- return 0;
-
- if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- system_chunk))
- return 0;
-
- return (used >= thresh && !btrfs_fs_closing(fs_info) &&
- !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
-}
-
-static bool wake_all_tickets(struct list_head *head)
-{
- struct reserve_ticket *ticket;
-
- while (!list_empty(head)) {
- ticket = list_first_entry(head, struct reserve_ticket, list);
- list_del_init(&ticket->list);
- ticket->error = -ENOSPC;
- wake_up(&ticket->wait);
- if (ticket->bytes != ticket->orig_bytes)
- return true;
- }
- return false;
-}
-
-/*
- * This is for normal flushers, we can wait all goddamned day if we want to. We
- * will loop and continuously try to flush as long as we are making progress.
- * We count progress as clearing off tickets each time we have to loop.
- */
-static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
-{
- struct btrfs_fs_info *fs_info;
- struct btrfs_space_info *space_info;
- u64 to_reclaim;
- int flush_state;
- int commit_cycles = 0;
- u64 last_tickets_id;
-
- fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
- if (!to_reclaim) {
- space_info->flush = 0;
- spin_unlock(&space_info->lock);
- return;
- }
- last_tickets_id = space_info->tickets_id;
- spin_unlock(&space_info->lock);
-
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- do {
- flush_space(fs_info, space_info, to_reclaim, flush_state);
- spin_lock(&space_info->lock);
- if (list_empty(&space_info->tickets)) {
- space_info->flush = 0;
- spin_unlock(&space_info->lock);
- return;
- }
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
- space_info,
- false);
- if (last_tickets_id == space_info->tickets_id) {
- flush_state++;
- } else {
- last_tickets_id = space_info->tickets_id;
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- if (commit_cycles)
- commit_cycles--;
- }
-
- /*
- * We don't want to force a chunk allocation until we've tried
- * pretty hard to reclaim space. Think of the case where we
- * freed up a bunch of space and so have a lot of pinned space
- * to reclaim. We would rather use that than possibly create a
- * underutilized metadata chunk. So if this is our first run
- * through the flushing state machine skip ALLOC_CHUNK_FORCE and
- * commit the transaction. If nothing has changed the next go
- * around then we can force a chunk allocation.
- */
- if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
- flush_state++;
-
- if (flush_state > COMMIT_TRANS) {
- commit_cycles++;
- if (commit_cycles > 2) {
- if (wake_all_tickets(&space_info->tickets)) {
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- commit_cycles--;
- } else {
- space_info->flush = 0;
- }
- } else {
- flush_state = FLUSH_DELAYED_ITEMS_NR;
- }
- }
- spin_unlock(&space_info->lock);
- } while (flush_state <= COMMIT_TRANS);
-}
-
-void btrfs_init_async_reclaim_work(struct work_struct *work)
-{
- INIT_WORK(work, btrfs_async_reclaim_metadata_space);
-}
-
-static const enum btrfs_flush_state priority_flush_states[] = {
- FLUSH_DELAYED_ITEMS_NR,
- FLUSH_DELAYED_ITEMS,
- ALLOC_CHUNK,
-};
-
-static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
-{
- u64 to_reclaim;
- int flush_state;
-
- spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
- false);
- if (!to_reclaim) {
- spin_unlock(&space_info->lock);
- return;
- }
- spin_unlock(&space_info->lock);
-
- flush_state = 0;
- do {
- flush_space(fs_info, space_info, to_reclaim,
- priority_flush_states[flush_state]);
- flush_state++;
- spin_lock(&space_info->lock);
- if (ticket->bytes == 0) {
- spin_unlock(&space_info->lock);
- return;
- }
- spin_unlock(&space_info->lock);
- } while (flush_state < ARRAY_SIZE(priority_flush_states));
-}
-
-static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- struct reserve_ticket *ticket)
-
-{
- DEFINE_WAIT(wait);
- u64 reclaim_bytes = 0;
- int ret = 0;
-
- spin_lock(&space_info->lock);
- while (ticket->bytes > 0 && ticket->error == 0) {
- ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
- if (ret) {
- ret = -EINTR;
- break;
- }
- spin_unlock(&space_info->lock);
-
- schedule();
-
- finish_wait(&ticket->wait, &wait);
- spin_lock(&space_info->lock);
- }
- if (!ret)
- ret = ticket->error;
- if (!list_empty(&ticket->list))
- list_del_init(&ticket->list);
- if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
- reclaim_bytes = ticket->orig_bytes - ticket->bytes;
- spin_unlock(&space_info->lock);
-
- if (reclaim_bytes)
- space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
- return ret;
-}
-
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @space_info - the space info we want to allocate from
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv. If there is not enough space it will make an attempt to
- * flush out space to make room. It will do this by flushing delalloc if
- * possible or committing the transaction. If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush,
- bool system_chunk)
-{
- struct reserve_ticket ticket;
- u64 used;
- u64 reclaim_bytes = 0;
- int ret = 0;
-
- ASSERT(orig_bytes);
- ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
-
- spin_lock(&space_info->lock);
- ret = -ENOSPC;
- used = btrfs_space_info_used(space_info, true);
-
- /*
- * If we have enough space then hooray, make our reservation and carry
- * on. If not see if we can overcommit, and if we can, hooray carry on.
- * If not things get more complicated.
- */
- if (used + orig_bytes <= space_info->total_bytes) {
- update_bytes_may_use(space_info, orig_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, orig_bytes, 1);
- ret = 0;
- } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
- system_chunk)) {
- update_bytes_may_use(space_info, orig_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, orig_bytes, 1);
- ret = 0;
- }
-
- /*
- * If we couldn't make a reservation then setup our reservation ticket
- * and kick the async worker if it's not already running.
- *
- * If we are a priority flusher then we just need to add our ticket to
- * the list and we will do our own flushing further down.
- */
- if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
- ticket.orig_bytes = orig_bytes;
- ticket.bytes = orig_bytes;
- ticket.error = 0;
- init_waitqueue_head(&ticket.wait);
- if (flush == BTRFS_RESERVE_FLUSH_ALL) {
- list_add_tail(&ticket.list, &space_info->tickets);
- if (!space_info->flush) {
- space_info->flush = 1;
- trace_btrfs_trigger_flush(fs_info,
- space_info->flags,
- orig_bytes, flush,
- "enospc");
- queue_work(system_unbound_wq,
- &fs_info->async_reclaim_work);
- }
- } else {
- list_add_tail(&ticket.list,
- &space_info->priority_tickets);
- }
- } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
- used += orig_bytes;
- /*
- * We will do the space reservation dance during log replay,
- * which means we won't have fs_info->fs_root set, so don't do
- * the async reclaim as we will panic.
- */
- if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
- need_do_async_reclaim(fs_info, space_info,
- used, system_chunk) &&
- !work_busy(&fs_info->async_reclaim_work)) {
- trace_btrfs_trigger_flush(fs_info, space_info->flags,
- orig_bytes, flush, "preempt");
- queue_work(system_unbound_wq,
- &fs_info->async_reclaim_work);
- }
- }
- spin_unlock(&space_info->lock);
- if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
- return ret;
-
- if (flush == BTRFS_RESERVE_FLUSH_ALL)
- return wait_reserve_ticket(fs_info, space_info, &ticket);
-
- ret = 0;
- priority_reclaim_metadata_space(fs_info, space_info, &ticket);
- spin_lock(&space_info->lock);
- if (ticket.bytes) {
- if (ticket.bytes < orig_bytes)
- reclaim_bytes = orig_bytes - ticket.bytes;
- list_del_init(&ticket.list);
- ret = -ENOSPC;
- }
- spin_unlock(&space_info->lock);
-
- if (reclaim_bytes)
- space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
- ASSERT(list_empty(&ticket.list));
- return ret;
-}
-
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @block_rsv - the block_rsv we're allocating for
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv. If there is not enough space it will make an attempt to
- * flush out space to make room. It will do this by flushing delalloc if
- * possible or committing the transaction. If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int reserve_metadata_bytes(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- int ret;
- bool system_chunk = (root == fs_info->chunk_root);
-
- ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
- orig_bytes, flush, system_chunk);
- if (ret == -ENOSPC &&
- unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
- if (block_rsv != global_rsv &&
- !block_rsv_use_bytes(global_rsv, orig_bytes))
- ret = 0;
- }
- if (ret == -ENOSPC) {
- trace_btrfs_space_reservation(fs_info, "space_info:enospc",
- block_rsv->space_info->flags,
- orig_bytes, 1);
-
- if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- dump_space_info(fs_info, block_rsv->space_info,
- orig_bytes, 0);
- }
- return ret;
-}
-
-static struct btrfs_block_rsv *get_block_rsv(
- const struct btrfs_trans_handle *trans,
- const struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv = NULL;
-
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
- (root == fs_info->csum_root && trans->adding_csums) ||
- (root == fs_info->uuid_root))
- block_rsv = trans->block_rsv;
-
- if (!block_rsv)
- block_rsv = root->block_rsv;
-
- if (!block_rsv)
- block_rsv = &fs_info->empty_block_rsv;
-
- return block_rsv;
-}
-
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- int ret = -ENOSPC;
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved >= num_bytes) {
- block_rsv->reserved -= num_bytes;
- if (block_rsv->reserved < block_rsv->size)
- block_rsv->full = 0;
- ret = 0;
- }
- spin_unlock(&block_rsv->lock);
- return ret;
-}
-
-static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, bool update_size)
-{
- spin_lock(&block_rsv->lock);
- block_rsv->reserved += num_bytes;
- if (update_size)
- block_rsv->size += num_bytes;
- else if (block_rsv->reserved >= block_rsv->size)
- block_rsv->full = 1;
- spin_unlock(&block_rsv->lock);
-}
-
-int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *dest, u64 num_bytes,
- int min_factor)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 min_bytes;
-
- if (global_rsv->space_info != dest->space_info)
- return -ENOSPC;
-
- spin_lock(&global_rsv->lock);
- min_bytes = div_factor(global_rsv->size, min_factor);
- if (global_rsv->reserved < min_bytes + num_bytes) {
- spin_unlock(&global_rsv->lock);
- return -ENOSPC;
- }
- global_rsv->reserved -= num_bytes;
- if (global_rsv->reserved < global_rsv->size)
- global_rsv->full = 0;
- spin_unlock(&global_rsv->lock);
-
- block_rsv_add_bytes(dest, num_bytes, true);
- return 0;
-}
-
-/**
- * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
- * @fs_info - the fs info for our fs.
- * @src - the source block rsv to transfer from.
- * @num_bytes - the number of bytes to transfer.
- *
- * This transfers up to the num_bytes amount from the src rsv to the
- * delayed_refs_rsv. Any extra bytes are returned to the space info.
- */
-void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *src,
- u64 num_bytes)
-{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
- u64 to_free = 0;
-
- spin_lock(&src->lock);
- src->reserved -= num_bytes;
- src->size -= num_bytes;
- spin_unlock(&src->lock);
-
- spin_lock(&delayed_refs_rsv->lock);
- if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
- u64 delta = delayed_refs_rsv->size -
- delayed_refs_rsv->reserved;
- if (num_bytes > delta) {
- to_free = num_bytes - delta;
- num_bytes = delta;
- }
- } else {
- to_free = num_bytes;
- num_bytes = 0;
- }
-
- if (num_bytes)
- delayed_refs_rsv->reserved += num_bytes;
- if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
- delayed_refs_rsv->full = 1;
- spin_unlock(&delayed_refs_rsv->lock);
-
- if (num_bytes)
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, num_bytes, 1);
- if (to_free)
- space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
- to_free);
-}
-
-/**
- * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
- * @fs_info - the fs_info for our fs.
- * @flush - control how we can flush for this reservation.
- *
- * This will refill the delayed block_rsv up to 1 items size worth of space and
- * will return -ENOSPC if we can't make the reservation.
- */
-int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
- enum btrfs_reserve_flush_enum flush)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
- u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size) {
- num_bytes = block_rsv->size - block_rsv->reserved;
- num_bytes = min(num_bytes, limit);
- }
- spin_unlock(&block_rsv->lock);
-
- if (!num_bytes)
- return 0;
-
- ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
- num_bytes, flush);
- if (ret)
- return ret;
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, num_bytes, 1);
- return 0;
-}
-
-/*
- * This is for space we already have accounted in space_info->bytes_may_use, so
- * basically when we're returning space from block_rsv's.
- */
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes)
-{
- struct reserve_ticket *ticket;
- struct list_head *head;
- u64 used;
- enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
- bool check_overcommit = false;
-
- spin_lock(&space_info->lock);
- head = &space_info->priority_tickets;
-
- /*
- * If we are over our limit then we need to check and see if we can
- * overcommit, and if we can't then we just need to free up our space
- * and not satisfy any requests.
- */
- used = btrfs_space_info_used(space_info, true);
- if (used - num_bytes >= space_info->total_bytes)
- check_overcommit = true;
-again:
- while (!list_empty(head) && num_bytes) {
- ticket = list_first_entry(head, struct reserve_ticket,
- list);
- /*
- * We use 0 bytes because this space is already reserved, so
- * adding the ticket space would be a double count.
- */
- if (check_overcommit &&
- !can_overcommit(fs_info, space_info, 0, flush, false))
- break;
- if (num_bytes >= ticket->bytes) {
- list_del_init(&ticket->list);
- num_bytes -= ticket->bytes;
- ticket->bytes = 0;
- space_info->tickets_id++;
- wake_up(&ticket->wait);
- } else {
- ticket->bytes -= num_bytes;
- num_bytes = 0;
- }
- }
-
- if (num_bytes && head == &space_info->priority_tickets) {
- head = &space_info->tickets;
- flush = BTRFS_RESERVE_FLUSH_ALL;
- goto again;
- }
- update_bytes_may_use(space_info, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, num_bytes, 0);
- spin_unlock(&space_info->lock);
-}
-
-/*
- * This is for newly allocated space that isn't accounted in
- * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
- * we use this helper.
- */
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info,
- u64 num_bytes)
-{
- struct reserve_ticket *ticket;
- struct list_head *head = &space_info->priority_tickets;
-
-again:
- while (!list_empty(head) && num_bytes) {
- ticket = list_first_entry(head, struct reserve_ticket,
- list);
- if (num_bytes >= ticket->bytes) {
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags,
- ticket->bytes, 1);
- list_del_init(&ticket->list);
- num_bytes -= ticket->bytes;
- update_bytes_may_use(space_info, ticket->bytes);
- ticket->bytes = 0;
- space_info->tickets_id++;
- wake_up(&ticket->wait);
- } else {
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags,
- num_bytes, 1);
- update_bytes_may_use(space_info, num_bytes);
- ticket->bytes -= num_bytes;
- num_bytes = 0;
- }
- }
-
- if (num_bytes && head == &space_info->priority_tickets) {
- head = &space_info->tickets;
- goto again;
- }
-}
-
-static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- struct btrfs_block_rsv *dest, u64 num_bytes,
- u64 *qgroup_to_release_ret)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 qgroup_to_release = 0;
- u64 ret;
-
- spin_lock(&block_rsv->lock);
- if (num_bytes == (u64)-1) {
- num_bytes = block_rsv->size;
- qgroup_to_release = block_rsv->qgroup_rsv_size;
- }
- block_rsv->size -= num_bytes;
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
- } else {
- num_bytes = 0;
- }
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
- qgroup_to_release = block_rsv->qgroup_rsv_reserved -
- block_rsv->qgroup_rsv_size;
- block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
- } else {
- qgroup_to_release = 0;
- }
- spin_unlock(&block_rsv->lock);
-
- ret = num_bytes;
- if (num_bytes > 0) {
- if (dest) {
- spin_lock(&dest->lock);
- if (!dest->full) {
- u64 bytes_to_add;
-
- bytes_to_add = dest->size - dest->reserved;
- bytes_to_add = min(num_bytes, bytes_to_add);
- dest->reserved += bytes_to_add;
- if (dest->reserved >= dest->size)
- dest->full = 1;
- num_bytes -= bytes_to_add;
- }
- spin_unlock(&dest->lock);
- }
- if (num_bytes)
- space_info_add_old_bytes(fs_info, space_info,
- num_bytes);
- }
- if (qgroup_to_release_ret)
- *qgroup_to_release_ret = qgroup_to_release;
- return ret;
-}
-
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
- struct btrfs_block_rsv *dst, u64 num_bytes,
- bool update_size)
-{
- int ret;
-
- ret = block_rsv_use_bytes(src, num_bytes);
- if (ret)
- return ret;
-
- block_rsv_add_bytes(dst, num_bytes, update_size);
- return 0;
-}
-
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
-{
- memset(rsv, 0, sizeof(*rsv));
- spin_lock_init(&rsv->lock);
- rsv->type = type;
-}
-
-void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- unsigned short type)
-{
- btrfs_init_block_rsv(rsv, type);
- rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
-}
-
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
- unsigned short type)
-{
- struct btrfs_block_rsv *block_rsv;
-
- block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
- if (!block_rsv)
- return NULL;
-
- btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
- return block_rsv;
-}
-
-void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv)
-{
- if (!rsv)
- return;
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
- kfree(rsv);
-}
-
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 num_bytes,
- enum btrfs_reserve_flush_enum flush)
-{
- int ret;
-
- if (num_bytes == 0)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret)
- block_rsv_add_bytes(block_rsv, num_bytes, true);
-
- return ret;
-}
-
-int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- spin_unlock(&block_rsv->lock);
-
- return ret;
-}
-
-int btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, u64 min_reserved,
- enum btrfs_reserve_flush_enum flush)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = min_reserved;
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- else
- num_bytes -= block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
-
- if (!ret)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, false);
- return 0;
- }
-
- return ret;
-}
-
-static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, u64 *qgroup_to_release)
-{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *target = delayed_rsv;
-
- if (target->full || target == block_rsv)
- target = global_rsv;
-
- if (block_rsv->space_info != target->space_info)
- target = NULL;
-
- return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
- qgroup_to_release);
-}
-
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
-}
-
-/**
- * btrfs_inode_rsv_release - release any excessive reservation.
- * @inode - the inode we need to release from.
- * @qgroup_free - free or convert qgroup meta.
- * Unlike normal operation, qgroup meta reservation needs to know if we are
- * freeing qgroup reservation or just converting it into per-trans. Normally
- * @qgroup_free is true for error handling, and false for normal release.
- *
- * This is the same as btrfs_block_rsv_release, except that it handles the
- * tracepoint for the reservation.
- */
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 released = 0;
- u64 qgroup_to_release = 0;
-
- /*
- * Since we statically set the block_rsv->size we just want to say we
- * are releasing 0 bytes, and then we'll just get the reservation over
- * the size free'd.
- */
- released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
- &qgroup_to_release);
- if (released > 0)
- trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), released, 0);
- if (qgroup_free)
- btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
- else
- btrfs_qgroup_convert_reserved_meta(inode->root,
- qgroup_to_release);
-}
-
-/**
- * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
- * @fs_info - the fs_info for our fs.
- * @nr - the number of items to drop.
- *
- * This drops the delayed ref head's count from the delayed refs rsv and frees
- * any excess reservation we had.
- */
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
- u64 released = 0;
-
- released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
- num_bytes, NULL);
- if (released)
- trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
- 0, released, 0);
-}
-
-static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
- struct btrfs_space_info *sinfo = block_rsv->space_info;
- u64 num_bytes;
-
- /*
- * The global block rsv is based on the size of the extent tree, the
- * checksum tree and the root tree. If the fs is empty we want to set
- * it to a minimal amount for safety.
- */
- num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
- btrfs_root_used(&fs_info->csum_root->root_item) +
- btrfs_root_used(&fs_info->tree_root->root_item);
- num_bytes = max_t(u64, num_bytes, SZ_16M);
-
- spin_lock(&sinfo->lock);
- spin_lock(&block_rsv->lock);
-
- block_rsv->size = min_t(u64, num_bytes, SZ_512M);
-
- if (block_rsv->reserved < block_rsv->size) {
- num_bytes = btrfs_space_info_used(sinfo, true);
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- num_bytes = min(num_bytes,
- block_rsv->size - block_rsv->reserved);
- block_rsv->reserved += num_bytes;
- update_bytes_may_use(sinfo, num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes,
- 1);
- }
- } else if (block_rsv->reserved > block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- update_bytes_may_use(sinfo, -num_bytes);
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 0);
- block_rsv->reserved = block_rsv->size;
- }
-
- if (block_rsv->reserved == block_rsv->size)
- block_rsv->full = 1;
- else
- block_rsv->full = 0;
-
- spin_unlock(&block_rsv->lock);
- spin_unlock(&sinfo->lock);
-}
-
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- fs_info->chunk_block_rsv.space_info = space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- fs_info->global_block_rsv.space_info = space_info;
- fs_info->trans_block_rsv.space_info = space_info;
- fs_info->empty_block_rsv.space_info = space_info;
- fs_info->delayed_block_rsv.space_info = space_info;
- fs_info->delayed_refs_rsv.space_info = space_info;
-
- fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
- fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
- fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
- if (fs_info->quota_root)
- fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
-
- update_global_block_rsv(fs_info);
-}
-
-static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
- (u64)-1, NULL);
- WARN_ON(fs_info->trans_block_rsv.size > 0);
- WARN_ON(fs_info->trans_block_rsv.reserved > 0);
- WARN_ON(fs_info->chunk_block_rsv.size > 0);
- WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_block_rsv.size > 0);
- WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_refs_rsv.size > 0);
-}
-
-/*
- * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
- * @trans - the trans that may have generated delayed refs
- *
- * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
- * it'll calculate the additional size and add it to the delayed_refs_rsv.
- */
-void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
- u64 num_bytes;
-
- if (!trans->delayed_ref_updates)
- return;
-
- num_bytes = btrfs_calc_trans_metadata_size(fs_info,
- trans->delayed_ref_updates);
- spin_lock(&delayed_rsv->lock);
- delayed_rsv->size += num_bytes;
- delayed_rsv->full = 0;
- spin_unlock(&delayed_rsv->lock);
- trans->delayed_ref_updates = 0;
-}
-
-/*
- * To be called after all the new block groups attached to the transaction
- * handle have been created (btrfs_create_pending_block_groups()).
- */
-void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
-
- if (!trans->chunk_bytes_reserved)
- return;
-
- WARN_ON_ONCE(!list_empty(&trans->new_bgs));
-
- block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
- trans->chunk_bytes_reserved, NULL);
- trans->chunk_bytes_reserved = 0;
-}
-
-/*
- * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
- * root: the root of the parent directory
- * rsv: block reservation
- * items: the number of items that we need do reservation
- * use_global_rsv: allow fallback to the global block reservation
- *
- * This function is used to reserve the space for snapshot/subvolume
- * creation and deletion. Those operations are different with the
- * common file/directory operations, they change two fs/file trees
- * and root tree, the number of items that the qgroup reserves is
- * different with the free space reservation. So we can not use
- * the space reservation mechanism in start_transaction().
- */
-int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv, int items,
- bool use_global_rsv)
-{
- u64 qgroup_num_bytes = 0;
- u64 num_bytes;
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- /* One for parent inode, two for dir entries */
- qgroup_num_bytes = 3 * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta_prealloc(root,
- qgroup_num_bytes, true);
- if (ret)
- return ret;
- }
-
- num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
- rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
- ret = btrfs_block_rsv_add(root, rsv, num_bytes,
- BTRFS_RESERVE_FLUSH_ALL);
-
- if (ret == -ENOSPC && use_global_rsv)
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
-
- if (ret && qgroup_num_bytes)
- btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
-
- return ret;
-}
-
-void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv)
-{
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
-}
-
-static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
- struct btrfs_inode *inode)
-{
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 reserve_size = 0;
- u64 qgroup_rsv_size = 0;
- u64 csum_leaves;
- unsigned outstanding_extents;
-
- lockdep_assert_held(&inode->lock);
- outstanding_extents = inode->outstanding_extents;
- if (outstanding_extents)
- reserve_size = btrfs_calc_trans_metadata_size(fs_info,
- outstanding_extents + 1);
- csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
- inode->csum_bytes);
- reserve_size += btrfs_calc_trans_metadata_size(fs_info,
- csum_leaves);
- /*
- * For qgroup rsv, the calculation is very simple:
- * account one nodesize for each outstanding extent
- *
- * This is overestimating in most cases.
- */
- qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
-
- spin_lock(&block_rsv->lock);
- block_rsv->size = reserve_size;
- block_rsv->qgroup_rsv_size = qgroup_rsv_size;
- spin_unlock(&block_rsv->lock);
-}
-
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
- u64 num_bytes, u64 *meta_reserve,
- u64 *qgroup_reserve)
-{
- u64 nr_extents = count_max_extents(num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
-
- /* We add one for the inode update at finish ordered time */
- *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
- nr_extents + csum_leaves + 1);
- *qgroup_reserve = nr_extents * fs_info->nodesize;
-}
-
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
- u64 meta_reserve, qgroup_reserve;
- unsigned nr_extents;
- enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
- int ret = 0;
- bool delalloc_lock = true;
-
- /* If we are a free space inode we need to not flush since we will be in
- * the middle of a transaction commit. We also don't need the delalloc
- * mutex since we won't race with anybody. We need this mostly to make
- * lockdep shut its filthy mouth.
- *
- * If we have a transaction open (can happen if we call truncate_block
- * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
- */
- if (btrfs_is_free_space_inode(inode)) {
- flush = BTRFS_RESERVE_NO_FLUSH;
- delalloc_lock = false;
- } else {
- if (current->journal_info)
- flush = BTRFS_RESERVE_FLUSH_LIMIT;
-
- if (btrfs_transaction_in_commit(fs_info))
- schedule_timeout(1);
- }
-
- if (delalloc_lock)
- mutex_lock(&inode->delalloc_mutex);
-
- num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-
- /*
- * We always want to do it this way, every other way is wrong and ends
- * in tears. Pre-reserving the amount we are going to add will always
- * be the right way, because otherwise if we have enough parallelism we
- * could end up with thousands of inodes all holding little bits of
- * reservations they were able to make previously and the only way to
- * reclaim that space is to ENOSPC out the operations and clear
- * everything out and try again, which is bad. This way we just
- * over-reserve slightly, and clean up the mess when we are done.
- */
- calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
- &qgroup_reserve);
- ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
- if (ret)
- goto out_fail;
- ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
- if (ret)
- goto out_qgroup;
-
- /*
- * Now we need to update our outstanding extents and csum bytes _first_
- * and then add the reservation to the block_rsv. This keeps us from
- * racing with an ordered completion or some such that would think it
- * needs to free the reservation we just made.
- */
- spin_lock(&inode->lock);
- nr_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- /* Now we can safely add our space to our block rsv */
- block_rsv_add_bytes(block_rsv, meta_reserve, false);
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), meta_reserve, 1);
-
- spin_lock(&block_rsv->lock);
- block_rsv->qgroup_rsv_reserved += qgroup_reserve;
- spin_unlock(&block_rsv->lock);
-
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
- return 0;
-out_qgroup:
- btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
-out_fail:
- btrfs_inode_rsv_release(inode, true);
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
- return ret;
-}
-
-/**
- * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for.
- * @num_bytes: the number of bytes we are releasing.
- * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
- *
- * This will release the metadata reservation for an inode. This can be called
- * once we complete IO for a given set of bytes to release their metadata
- * reservations, or on error for the same reason.
- */
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
-
- num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
- spin_lock(&inode->lock);
- inode->csum_bytes -= num_bytes;
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- if (btrfs_is_testing(fs_info))
- return;
-
- btrfs_inode_rsv_release(inode, qgroup_free);
-}
-
-/**
- * btrfs_delalloc_release_extents - release our outstanding_extents
- * @inode: the inode to balance the reservation for.
- * @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
- *
- * When we reserve space we increase outstanding_extents for the extents we may
- * add. Once we've set the range as delalloc or created our ordered extents we
- * have outstanding_extents to track the real usage, so we use this to free our
- * temporarily tracked outstanding_extents. This _must_ be used in conjunction
- * with btrfs_delalloc_reserve_metadata.
- */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- unsigned num_extents;
-
- spin_lock(&inode->lock);
- num_extents = count_max_extents(num_bytes);
- btrfs_mod_outstanding_extents(inode, -num_extents);
- btrfs_calculate_inode_block_rsv_size(fs_info, inode);
- spin_unlock(&inode->lock);
-
- if (btrfs_is_testing(fs_info))
- return;
-
- btrfs_inode_rsv_release(inode, qgroup_free);
-}
-
-/**
- * btrfs_delalloc_reserve_space - reserve data and metadata space for
- * delalloc
- * @inode: inode we're writing to
- * @start: start range we are writing to
- * @len: how long the range we are writing to
- * @reserved: mandatory parameter, record actually reserved qgroup ranges of
- * current reservation.
- *
- * This will do the following things
- *
- * o reserve space in data space info for num bytes
- * and reserve precious corresponding qgroup space
- * (Done in check_data_free_space)
- *
- * o reserve space for metadata space, based on the number of outstanding
- * extents and how much csums will be needed
- * also reserve metadata space in a per root over-reserve method.
- * o add to the inodes->delalloc_bytes
- * o add it to the fs_info's delalloc inodes list.
- * (Above 3 all done in delalloc_reserve_metadata)
- *
- * Return 0 for success
- * Return <0 for error(-ENOSPC or -EQUOT)
- */
-int btrfs_delalloc_reserve_space(struct inode *inode,
- struct extent_changeset **reserved, u64 start, u64 len)
-{
- int ret;
-
- ret = btrfs_check_data_free_space(inode, reserved, start, len);
- if (ret < 0)
- return ret;
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
- if (ret < 0)
- btrfs_free_reserved_data_space(inode, *reserved, start, len);
- return ret;
-}
-
-/**
- * btrfs_delalloc_release_space - release data and metadata space for delalloc
- * @inode: inode we're releasing space for
- * @start: start position of the space already reserved
- * @len: the len of the space already reserved
- * @release_bytes: the len of the space we consumed or didn't use
- *
- * This function will release the metadata space that was not used and will
- * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
- * list if there are no delalloc bytes left.
- * Also it will handle the qgroup reserved space.
- */
-void btrfs_delalloc_release_space(struct inode *inode,
- struct extent_changeset *reserved,
- u64 start, u64 len, bool qgroup_free)
-{
- btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
- btrfs_free_reserved_data_space(inode, reserved, start, len);
-}
-
static int update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc)
{
@@ -6296,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
cache->pinned += num_bytes;
- update_bytes_pinned(cache->space_info, num_bytes);
+ btrfs_space_info_update_bytes_pinned(info,
+ cache->space_info, num_bytes);
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
spin_unlock(&cache->lock);
@@ -6371,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
- update_bytes_pinned(cache->space_info, num_bytes);
+ btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
+ num_bytes);
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
@@ -6580,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
- update_bytes_may_use(space_info, -ram_bytes);
+ btrfs_space_info_update_bytes_may_use(cache->fs_info,
+ space_info, -ram_bytes);
if (delalloc)
cache->delalloc_bytes += num_bytes;
}
@@ -6646,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
up_write(&fs_info->commit_root_sem);
- update_global_block_rsv(fs_info);
+ btrfs_update_global_block_rsv(fs_info);
}
/*
@@ -6736,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
- update_bytes_pinned(space_info, -len);
+ btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
@@ -6757,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += to_add;
- update_bytes_may_use(space_info, to_add);
+ btrfs_space_info_update_bytes_may_use(fs_info,
+ space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
trace_btrfs_space_reservation(fs_info,
@@ -6769,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_unlock(&global_rsv->lock);
/* Add to any tickets we may have */
if (len)
- space_info_add_new_bytes(fs_info, space_info,
- len);
+ btrfs_space_info_add_new_bytes(fs_info,
+ space_info, len);
}
spin_unlock(&space_info->lock);
}
@@ -7191,7 +5067,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
out:
if (pin)
- add_pinned_bytes(fs_info, &generic_ref, 1);
+ add_pinned_bytes(fs_info, &generic_ref);
if (last_ref) {
/*
@@ -7239,7 +5115,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
btrfs_ref_tree_mod(fs_info, ref);
if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
- add_pinned_bytes(fs_info, ref, 1);
+ add_pinned_bytes(fs_info, ref);
return ret;
}
@@ -7292,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
}
enum btrfs_loop_type {
- LOOP_CACHING_NOWAIT = 0,
- LOOP_CACHING_WAIT = 1,
- LOOP_ALLOC_CHUNK = 2,
- LOOP_NO_EMPTY_SIZE = 3,
+ LOOP_CACHING_NOWAIT,
+ LOOP_CACHING_WAIT,
+ LOOP_ALLOC_CHUNK,
+ LOOP_NO_EMPTY_SIZE,
};
static inline void
@@ -7661,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return ret;
}
- ret = do_chunk_alloc(trans, ffe_ctl->flags,
- CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
+ CHUNK_ALLOC_FORCE);
/*
* If we can't allocate a new chunk we've already looped
@@ -7758,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
- space_info = __find_space_info(fs_info, flags);
+ space_info = btrfs_find_space_info(fs_info, flags);
if (!space_info) {
btrfs_err(fs_info, "No space info for %llu", flags);
return -ENOSPC;
@@ -7863,9 +5739,8 @@ search:
*/
if (!block_group_bits(block_group, flags)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID1_MASK |
+ BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_RAID10;
/*
@@ -7984,60 +5859,6 @@ loop:
return ret;
}
-#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
-do { \
- struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
- spin_lock(&__rsv->lock); \
- btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
- __rsv->size, __rsv->reserved); \
- spin_unlock(&__rsv->lock); \
-} while (0)
-
-static void dump_space_info(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups)
-{
- struct btrfs_block_group_cache *cache;
- int index = 0;
-
- spin_lock(&info->lock);
- btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
- info->flags,
- info->total_bytes - btrfs_space_info_used(info, true),
- info->full ? "" : "not ");
- btrfs_info(fs_info,
- "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
- info->total_bytes, info->bytes_used, info->bytes_pinned,
- info->bytes_reserved, info->bytes_may_use,
- info->bytes_readonly);
- spin_unlock(&info->lock);
-
- DUMP_BLOCK_RSV(fs_info, global_block_rsv);
- DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
- DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
- DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
- DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
-
- if (!dump_block_groups)
- return;
-
- down_read(&info->groups_sem);
-again:
- list_for_each_entry(cache, &info->block_groups[index], list) {
- spin_lock(&cache->lock);
- btrfs_info(fs_info,
- "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
- cache->key.objectid, cache->key.offset,
- btrfs_block_group_used(&cache->item), cache->pinned,
- cache->reserved, cache->ro ? "[readonly]" : "");
- btrfs_dump_free_space(cache, bytes);
- spin_unlock(&cache->lock);
- }
- if (++index < BTRFS_NR_RAID_TYPES)
- goto again;
- up_read(&info->groups_sem);
-}
-
/*
* btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
* hole that is at least as big as @num_bytes.
@@ -8113,12 +5934,13 @@ again:
} else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
- sinfo = __find_space_info(fs_info, flags);
+ sinfo = btrfs_find_space_info(fs_info, flags);
btrfs_err(fs_info,
"allocation failed flags %llu, wanted %llu",
flags, num_bytes);
if (sinfo)
- dump_space_info(fs_info, sinfo, num_bytes, 1);
+ btrfs_dump_space_info(fs_info, sinfo,
+ num_bytes, 1);
}
}
@@ -8456,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return buf;
}
-static struct btrfs_block_rsv *
-use_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- int ret;
- bool global_updated = false;
-
- block_rsv = get_block_rsv(trans, root);
-
- if (unlikely(block_rsv->size == 0))
- goto try_reserve;
-again:
- ret = block_rsv_use_bytes(block_rsv, blocksize);
- if (!ret)
- return block_rsv;
-
- if (block_rsv->failfast)
- return ERR_PTR(ret);
-
- if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
- global_updated = true;
- update_global_block_rsv(fs_info);
- goto again;
- }
-
- /*
- * The global reserve still exists to save us from ourselves, so don't
- * warn_on if we are short on our delayed refs reserve.
- */
- if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
- btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL * 10,
- /*DEFAULT_RATELIMIT_BURST*/ 1);
- if (__ratelimit(&_rs))
- WARN(1, KERN_DEBUG
- "BTRFS: block rsv returned %d\n", ret);
- }
-try_reserve:
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
- return block_rsv;
- /*
- * If we couldn't reserve metadata bytes try and use some from
- * the global reserve if its space type is the same as the global
- * reservation.
- */
- if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
- block_rsv->space_info == global_rsv->space_info) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- }
- return ERR_PTR(ret);
-}
-
-static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv, u32 blocksize)
-{
- block_rsv_add_bytes(block_rsv, blocksize, false);
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
-}
-
/*
* finds a free extent and does all the dirty work required for allocation
* returns the tree buffer or an ERR_PTR on error.
@@ -8555,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
}
#endif
- block_rsv = use_block_rsv(trans, root, blocksize);
+ block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
@@ -8613,7 +6368,7 @@ out_free_buf:
out_free_reserved:
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
out_unuse:
- unuse_block_rsv(fs_info, block_rsv, blocksize);
+ btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
}
@@ -9552,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
num_devices = fs_info->fs_devices->rw_devices;
- stripped = BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
+ stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
+ BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
@@ -9565,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
return stripped;
/* turn mirroring into duplication */
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID10))
return stripped | BTRFS_BLOCK_GROUP_DUP;
} else {
@@ -9636,7 +7390,7 @@ out:
btrfs_info(cache->fs_info,
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
sinfo_used, num_bytes, min_allocable_bytes);
- dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
}
return ret;
}
@@ -9678,8 +7432,7 @@ again:
*/
alloc_flags = update_block_group_flags(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = do_chunk_alloc(trans, alloc_flags,
- CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
* already allocated at the new raid level to
@@ -9695,7 +7448,7 @@ again:
if (!ret)
goto out;
alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
- ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
@@ -9716,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
- return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
@@ -9949,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree;
struct extent_map *em;
- em_tree = &root->fs_info->mapping_tree.map_tree;
+ em_tree = &root->fs_info->mapping_tree;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, found_key.objectid,
found_key.offset);
@@ -10102,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
*/
synchronize_rcu();
- release_global_block_rsv(info);
+ btrfs_release_global_block_rsv(info);
while (!list_empty(&info->space_info)) {
int i;
@@ -10118,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0))
- dump_space_info(info, space_info, 0, 0);
+ btrfs_dump_space_info(info, space_info, 0, 0);
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
@@ -10141,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
struct btrfs_space_info *space_info;
struct raid_kobject *rkobj;
LIST_HEAD(list);
- int index;
int ret = 0;
spin_lock(&fs_info->pending_raid_kobjs_lock);
@@ -10149,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->pending_raid_kobjs_lock);
list_for_each_entry(rkobj, &list, list) {
- space_info = __find_space_info(fs_info, rkobj->flags);
- index = btrfs_bg_flags_to_raid_index(rkobj->flags);
+ space_info = btrfs_find_space_info(fs_info, rkobj->flags);
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
- "%s", get_raid_name(index));
+ "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
if (ret) {
kobject_put(&rkobj->kobj);
break;
@@ -10243,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
*/
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
{
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct btrfs_block_group_cache *bg;
u64 start = 0;
int ret = 0;
while (1) {
- read_lock(&map_tree->map_tree.lock);
+ read_lock(&map_tree->lock);
/*
* lookup_extent_mapping will return the first extent map
* intersecting the range, so setting @len to 1 is enough to
* get the first chunk.
*/
- em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
- read_unlock(&map_tree->map_tree.lock);
+ em = lookup_extent_mapping(map_tree, start, 1);
+ read_unlock(&map_tree->lock);
if (!em)
break;
@@ -10417,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
trace_btrfs_add_block_group(info, cache, 0);
- update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- cache->bytes_super, &space_info);
+ btrfs_update_space_info(info, cache->flags, found_key.offset,
+ btrfs_block_group_used(&cache->item),
+ cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -10437,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
list_for_each_entry_rcu(space_info, &info->space_info, list) {
if (!(get_alloc_profile(info, space_info->flags) &
(BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID1_MASK |
+ BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_DUP)))
continue;
/*
@@ -10457,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
btrfs_add_raid_kobjects(info);
- init_global_block_rsv(info);
+ btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
@@ -10554,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
* assigned to our block group. We want our bg to be added to the rbtree
* with its ->space_info set.
*/
- cache->space_info = __find_space_info(fs_info, cache->flags);
+ cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
ASSERT(cache->space_info);
ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10569,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
- update_space_info(fs_info, cache->flags, size, bytes_used,
+ btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, &cache->space_info);
- update_global_block_rsv(fs_info);
+ btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -10598,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
write_sequnlock(&fs_info->profiles_lock);
}
+/*
+ * Clear incompat bits for the following feature(s):
+ *
+ * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
+ * in the whole filesystem
+ */
+static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ struct list_head *head = &fs_info->space_info;
+ struct btrfs_space_info *sinfo;
+
+ list_for_each_entry_rcu(sinfo, head, list) {
+ bool found = false;
+
+ down_read(&sinfo->groups_sem);
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
+ found = true;
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
+ found = true;
+ up_read(&sinfo->groups_sem);
+
+ if (found)
+ return;
+ }
+ btrfs_clear_fs_incompat(fs_info, RAID56);
+ }
+}
+
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em)
{
@@ -10744,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
clear_avail_alloc_bits(fs_info, block_group->flags);
}
up_write(&block_group->space_info->groups_sem);
+ clear_incompat_bg_bits(fs_info, block_group->flags);
if (kobj) {
kobject_del(kobj);
kobject_put(kobj);
@@ -10853,7 +8633,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (remove_em) {
struct extent_map_tree *em_tree;
- em_tree = &fs_info->mapping_tree.map_tree;
+ em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
@@ -10871,7 +8651,7 @@ struct btrfs_trans_handle *
btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
const u64 chunk_offset)
{
- struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct map_lookup *map;
unsigned int num_items;
@@ -11020,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- update_bytes_pinned(space_info, -block_group->pinned);
+ btrfs_space_info_update_bytes_pinned(fs_info, space_info,
+ -block_group->pinned);
space_info->bytes_readonly += block_group->pinned;
percpu_counter_add_batch(&space_info->total_bytes_pinned,
-block_group->pinned,
@@ -11076,43 +8857,6 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
}
-int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_super_block *disk_super;
- u64 features;
- u64 flags;
- int mixed = 0;
- int ret;
-
- disk_super = fs_info->super_copy;
- if (!btrfs_super_root(disk_super))
- return -EINVAL;
-
- features = btrfs_super_incompat_flags(disk_super);
- if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
- mixed = 1;
-
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = create_space_info(fs_info, flags);
- if (ret)
- goto out;
-
- if (mixed) {
- flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags);
- } else {
- flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = create_space_info(fs_info, flags);
- if (ret)
- goto out;
-
- flags = BTRFS_BLOCK_GROUP_DATA;
- ret = create_space_info(fs_info, flags);
- }
-out:
- return ret;
-}
-
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end)
{
@@ -11171,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
find_first_clear_extent_bit(&device->alloc_state, start,
&start, &end,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
+
+ /* Ensure we skip the reserved area in the first 1M */
+ start = max_t(u64, start, SZ_1M);
+
/*
* If find_first_clear_extent_bit find a range that spans the
* end of the device it will set end to -1, in this case it's up
* to the caller to trim the value to the size of the device.
*/
end = min(end, device->total_bytes - 1);
+
len = end - start + 1;
/* We didn't find any extents */