aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/btrfs/block-rsv.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-rsv.c')
-rw-r--r--fs/btrfs/block-rsv.c105
1 files changed, 99 insertions, 6 deletions
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index d07bd41a7c1e..27efec8f7c5b 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -6,6 +6,98 @@
#include "space-info.h"
#include "transaction.h"
+/*
+ * HOW DO BLOCK RESERVES WORK
+ *
+ * Think of block_rsv's as buckets for logically grouped metadata
+ * reservations. Each block_rsv has a ->size and a ->reserved. ->size is
+ * how large we want our block rsv to be, ->reserved is how much space is
+ * currently reserved for this block reserve.
+ *
+ * ->failfast exists for the truncate case, and is described below.
+ *
+ * NORMAL OPERATION
+ *
+ * -> Reserve
+ * Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill
+ *
+ * We call into btrfs_reserve_metadata_bytes() with our bytes, which is
+ * accounted for in space_info->bytes_may_use, and then add the bytes to
+ * ->reserved, and ->size in the case of btrfs_block_rsv_add.
+ *
+ * ->size is an over-estimation of how much we may use for a particular
+ * operation.
+ *
+ * -> Use
+ * Entrance: btrfs_use_block_rsv
+ *
+ * When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv()
+ * to determine the appropriate block_rsv to use, and then verify that
+ * ->reserved has enough space for our tree block allocation. Once
+ * successful we subtract fs_info->nodesize from ->reserved.
+ *
+ * -> Finish
+ * Entrance: btrfs_block_rsv_release
+ *
+ * We are finished with our operation, subtract our individual reservation
+ * from ->size, and then subtract ->size from ->reserved and free up the
+ * excess if there is any.
+ *
+ * There is some logic here to refill the delayed refs rsv or the global rsv
+ * as needed, otherwise the excess is subtracted from
+ * space_info->bytes_may_use.
+ *
+ * TYPES OF BLOCK RESERVES
+ *
+ * BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK
+ * These behave normally, as described above, just within the confines of the
+ * lifetime of their particular operation (transaction for the whole trans
+ * handle lifetime, for example).
+ *
+ * BLOCK_RSV_GLOBAL
+ * It is impossible to properly account for all the space that may be required
+ * to make our extent tree updates. This block reserve acts as an overflow
+ * buffer in case our delayed refs reserve does not reserve enough space to
+ * update the extent tree.
+ *
+ * We can steal from this in some cases as well, notably on evict() or
+ * truncate() in order to help users recover from ENOSPC conditions.
+ *
+ * BLOCK_RSV_DELALLOC
+ * The individual item sizes are determined by the per-inode size
+ * calculations, which are described with the delalloc code. This is pretty
+ * straightforward, it's just the calculation of ->size encodes a lot of
+ * different items, and thus it gets used when updating inodes, inserting file
+ * extents, and inserting checksums.
+ *
+ * BLOCK_RSV_DELREFS
+ * We keep a running tally of how many delayed refs we have on the system.
+ * We assume each one of these delayed refs are going to use a full
+ * reservation. We use the transaction items and pre-reserve space for every
+ * operation, and use this reservation to refill any gap between ->size and
+ * ->reserved that may exist.
+ *
+ * From there it's straightforward, removing a delayed ref means we remove its
+ * count from ->size and free up reservations as necessary. Since this is
+ * the most dynamic block reserve in the system, we will try to refill this
+ * block reserve first with any excess returned by any other block reserve.
+ *
+ * BLOCK_RSV_EMPTY
+ * This is the fallback block reserve to make us try to reserve space if we
+ * don't have a specific bucket for this allocation. It is mostly used for
+ * updating the device tree and such, since that is a separate pool we're
+ * content to just reserve space from the space_info on demand.
+ *
+ * BLOCK_RSV_TEMP
+ * This is used by things like truncate and iput. We will temporarily
+ * allocate a block reserve, set it to some size, and then truncate bytes
+ * until we have no space left. With ->failfast set we'll simply return
+ * ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try
+ * to make a new reservation. This is because these operations are
+ * unbounded, so we want to do as much work as we can, and then back off and
+ * re-reserve.
+ */
+
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes,
@@ -111,7 +203,7 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
{
if (!rsv)
return;
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
+ btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
kfree(rsv);
}
@@ -178,9 +270,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
-u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, u64 *qgroup_to_release)
+u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+ u64 *qgroup_to_release)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
@@ -297,9 +389,9 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved < block_rsv->size) {
num_bytes = block_rsv->size - block_rsv->reserved;
- block_rsv->reserved += num_bytes;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
num_bytes);
+ block_rsv->reserved = block_rsv->size;
} else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
@@ -344,7 +436,8 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
- btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
+ btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1,
+ NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);