aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2019-07-19 14:51:44 +0800
committerDavid Sterba <dsterba@suse.com>2019-09-09 14:59:01 +0200
commit112974d4067ba29ae59f94e0bc79f19bf9db1a53 (patch)
tree904a8d79a54a37ff93d937e83f6cc2560b67d302 /fs/btrfs/extent-tree.c
parentbtrfs: extent-tree: Add comment for inc_block_group_ro() (diff)
downloadlinux-dev-112974d4067ba29ae59f94e0bc79f19bf9db1a53.tar.xz
linux-dev-112974d4067ba29ae59f94e0bc79f19bf9db1a53.zip
btrfs: volumes: Remove ENOSPC-prone btrfs_can_relocate()
[BUG] Test case btrfs/156 fails since commit 302167c50b32 ("btrfs: don't end the transaction for delayed refs in throttle") with ENOSPC. [CAUSE] The ENOSPC is reported from btrfs_can_relocate(). This function will check: - If this block group is empty, we can relocate - If we can enough free space, we can relocate Above checks are valid but the following check is vague due to its implementation: - If and only if we can allocated a new block group to contain all the used space, we can relocate This design itself is OK, but the way to determine if we can allocate a new block group is problematic. btrfs_can_relocate() uses find_free_dev_extent() to find free space on a device. However find_free_dev_extent() only searches commit root and excludes dev extents allocated in current trans, this makes it unable to use dev extent just freed in current transaction. So for the following example, btrfs_can_relocate() will report ENOSPC: The example block group layout: 1M 129M 257M 385M 513M 550M |///////|///////////|//////////| | | // = Used bg, consider all bg is 100% used for easy calculation. And all block groups are SINGLE, on-disk bytenr is the same as the logical bytenr. 1) Bg in [129M, 257M) get relocated to [385M, 513M), transid=100 1M 129M 257M 385M 513M 550M |///////| |//////////|/////////| In transid 100, bg in [129M, 257M) get relocated to [385M, 513M) However transid 100 is not committed yet, so in dev commit tree, we still have the old dev extents layout: 1M 129M 257M 385M 513M 550M |///////|///////////|//////////| | | 2) Try to relocate bg [257M, 385M) We goes into btrfs_can_relocate(), no free space in current bgs, so we check if we can find large enough free dev extents. The first slot is [385M, 513M), but that is already used by new bg at [385M, 513M), so we continue search. The remaining slot is [512M, 550M), smaller than the bg's length 128M. So btrfs_can_relocate report ENOSPC. However this is over killed, in fact if we just skip btrfs_can_relocate() check, and go into regular relocation routine, at extent reservation time, if we can't find free extent, then we fallback to commit transaction, which will free up the dev extents and allow new block group to be created. [FIX] The fix here is to remove btrfs_can_relocate() completely. If we hit the false ENOSPC case just like btrfs/156, extent allocator will push harder by committing transaction and we will have space for new block group, avoiding the false ENOSPC. If we really ran out of space, we will hit ENOSPC at relocate_block_group(), and btrfs will just reports the ENOSPC error as usual. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c141
1 files changed, 0 insertions, 141 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e1a39fe183c1..4bd88b6b4865 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7547,147 +7547,6 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
spin_unlock(&sinfo->lock);
}
-/*
- * Checks to see if it's even possible to relocate this block group.
- *
- * @return - -1 if it's not a good idea to relocate this block group, 0 if its
- * ok to go ahead and try.
- */
-int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
-{
- struct btrfs_block_group_cache *block_group;
- struct btrfs_space_info *space_info;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- struct btrfs_device *device;
- u64 min_free;
- u64 dev_min = 1;
- u64 dev_nr = 0;
- u64 target;
- int debug;
- int index;
- int full = 0;
- int ret = 0;
-
- debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
-
- block_group = btrfs_lookup_block_group(fs_info, bytenr);
-
- /* odd, couldn't find the block group, leave it alone */
- if (!block_group) {
- if (debug)
- btrfs_warn(fs_info,
- "can't find block group for bytenr %llu",
- bytenr);
- return -1;
- }
-
- min_free = btrfs_block_group_used(&block_group->item);
-
- /* no bytes used, we're good */
- if (!min_free)
- goto out;
-
- space_info = block_group->space_info;
- spin_lock(&space_info->lock);
-
- full = space_info->full;
-
- /*
- * if this is the last block group we have in this space, we can't
- * relocate it unless we're able to allocate a new chunk below.
- *
- * Otherwise, we need to make sure we have room in the space to handle
- * all of the extents from this block group. If we can, we're good
- */
- if ((space_info->total_bytes != block_group->key.offset) &&
- (btrfs_space_info_used(space_info, false) + min_free <
- space_info->total_bytes)) {
- spin_unlock(&space_info->lock);
- goto out;
- }
- spin_unlock(&space_info->lock);
-
- /*
- * ok we don't have enough space, but maybe we have free space on our
- * devices to allocate new chunks for relocation, so loop through our
- * alloc devices and guess if we have enough space. if this block
- * group is going to be restriped, run checks against the target
- * profile instead of the current one.
- */
- ret = -1;
-
- /*
- * index:
- * 0: raid10
- * 1: raid1
- * 2: dup
- * 3: raid0
- * 4: single
- */
- target = get_restripe_target(fs_info, block_group->flags);
- if (target) {
- index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
- } else {
- /*
- * this is just a balance, so if we were marked as full
- * we know there is no space for a new chunk
- */
- if (full) {
- if (debug)
- btrfs_warn(fs_info,
- "no space to alloc new chunk for block group %llu",
- block_group->key.objectid);
- goto out;
- }
-
- index = btrfs_bg_flags_to_raid_index(block_group->flags);
- }
-
- if (index == BTRFS_RAID_RAID10) {
- dev_min = 4;
- /* Divide by 2 */
- min_free >>= 1;
- } else if (index == BTRFS_RAID_RAID1) {
- dev_min = 2;
- } else if (index == BTRFS_RAID_DUP) {
- /* Multiply by 2 */
- min_free <<= 1;
- } else if (index == BTRFS_RAID_RAID0) {
- dev_min = fs_devices->rw_devices;
- min_free = div64_u64(min_free, dev_min);
- }
-
- mutex_lock(&fs_info->chunk_mutex);
- list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
- u64 dev_offset;
-
- /*
- * check to make sure we can actually find a chunk with enough
- * space to fit our block group in.
- */
- if (device->total_bytes > device->bytes_used + min_free &&
- !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = find_free_dev_extent(device, min_free,
- &dev_offset, NULL);
- if (!ret)
- dev_nr++;
-
- if (dev_nr >= dev_min)
- break;
-
- ret = -1;
- }
- }
- if (debug && ret == -1)
- btrfs_warn(fs_info,
- "no space to allocate a new chunk for block group %llu",
- block_group->key.objectid);
- mutex_unlock(&fs_info->chunk_mutex);
-out:
- btrfs_put_block_group(block_group);
- return ret;
-}
-
static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_key *key)