diff options
Diffstat (limited to 'fs/btrfs/zoned.c')
-rw-r--r-- | fs/btrfs/zoned.c | 222 |
1 files changed, 138 insertions, 84 deletions
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 62e7007a7e46..1912abf6d020 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -639,6 +639,46 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) device->zone_info = NULL; } +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) +{ + struct btrfs_zoned_device_info *zone_info; + + zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); + if (!zone_info) + return NULL; + + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->seq_zones) + goto out; + + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, + zone_info->nr_zones); + + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->empty_zones) + goto out; + + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, + zone_info->nr_zones); + + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) + goto out; + + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, + zone_info->nr_zones); + zone_info->zone_cache = NULL; + + return zone_info; + +out: + bitmap_free(zone_info->seq_zones); + bitmap_free(zone_info->empty_zones); + bitmap_free(zone_info->active_zones); + kfree(zone_info); + return NULL; +} + int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { @@ -652,80 +692,55 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, return 0; } +static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info) +{ + struct btrfs_device *device; + + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + if (device->bdev && + bdev_zoned_model(device->bdev) == BLK_ZONED_HM) { + btrfs_err(fs_info, + "zoned: mode not enabled but zoned device found: %pg", + device->bdev); + return -EINVAL; + } + } + + return 0; +} + int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) { - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - u64 zoned_devices = 0; - u64 nr_devices = 0; u64 zone_size = 0; u64 max_zone_append_size = 0; - const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED); - int ret = 0; + int ret; - /* Count zoned devices */ - list_for_each_entry(device, &fs_devices->devices, dev_list) { - enum blk_zoned_model model; + /* + * Host-Managed devices can't be used without the ZONED flag. With the + * ZONED all devices can be used, using zone emulation if required. + */ + if (!btrfs_fs_incompat(fs_info, ZONED)) + return btrfs_check_for_zoned_device(fs_info); + + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + struct btrfs_zoned_device_info *zone_info = device->zone_info; if (!device->bdev) continue; - model = bdev_zoned_model(device->bdev); - /* - * A Host-Managed zoned device must be used as a zoned device. - * A Host-Aware zoned device and a non-zoned devices can be - * treated as a zoned device, if ZONED flag is enabled in the - * superblock. - */ - if (model == BLK_ZONED_HM || - (model == BLK_ZONED_HA && incompat_zoned) || - (model == BLK_ZONED_NONE && incompat_zoned)) { - struct btrfs_zoned_device_info *zone_info; - - zone_info = device->zone_info; - zoned_devices++; - if (!zone_size) { - zone_size = zone_info->zone_size; - } else if (zone_info->zone_size != zone_size) { - btrfs_err(fs_info, + if (!zone_size) { + zone_size = zone_info->zone_size; + } else if (zone_info->zone_size != zone_size) { + btrfs_err(fs_info, "zoned: unequal block device zone sizes: have %llu found %llu", - device->zone_info->zone_size, - zone_size); - ret = -EINVAL; - goto out; - } - if (!max_zone_append_size || - (zone_info->max_zone_append_size && - zone_info->max_zone_append_size < max_zone_append_size)) - max_zone_append_size = - zone_info->max_zone_append_size; + zone_info->zone_size, zone_size); + return -EINVAL; } - nr_devices++; - } - - if (!zoned_devices && !incompat_zoned) - goto out; - - if (!zoned_devices && incompat_zoned) { - /* No zoned block device found on ZONED filesystem */ - btrfs_err(fs_info, - "zoned: no zoned devices found on a zoned filesystem"); - ret = -EINVAL; - goto out; - } - - if (zoned_devices && !incompat_zoned) { - btrfs_err(fs_info, - "zoned: mode not enabled but zoned device found"); - ret = -EINVAL; - goto out; - } - - if (zoned_devices != nr_devices) { - btrfs_err(fs_info, - "zoned: cannot mix zoned and regular devices"); - ret = -EINVAL; - goto out; + if (!max_zone_append_size || + (zone_info->max_zone_append_size && + zone_info->max_zone_append_size < max_zone_append_size)) + max_zone_append_size = zone_info->max_zone_append_size; } /* @@ -737,14 +752,12 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "zoned: zone size %llu not aligned to stripe %u", zone_size, BTRFS_STRIPE_LEN); - ret = -EINVAL; - goto out; + return -EINVAL; } if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { btrfs_err(fs_info, "zoned: mixed block groups not supported"); - ret = -EINVAL; - goto out; + return -EINVAL; } fs_info->zone_size = zone_size; @@ -760,11 +773,10 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) */ ret = btrfs_check_mountopts_zoned(fs_info); if (ret) - goto out; + return ret; btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); -out: - return ret; + return 0; } int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) @@ -1436,7 +1448,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; - cache->zone_is_active = 1; + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); goto out; } } @@ -1452,7 +1464,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } cache->alloc_offset = alloc_offsets[0]; cache->zone_capacity = caps[0]; - cache->zone_is_active = test_bit(0, active); + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); break; case BTRFS_BLOCK_GROUP_DUP: if (map->type & BTRFS_BLOCK_GROUP_DATA) { @@ -1486,7 +1499,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } } else { - cache->zone_is_active = test_bit(0, active); + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &cache->runtime_flags); } cache->alloc_offset = alloc_offsets[0]; cache->zone_capacity = min(caps[0], caps[1]); @@ -1530,7 +1545,7 @@ out: if (!ret) { cache->meta_write_pointer = cache->alloc_offset + cache->start; - if (cache->zone_is_active) { + if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { btrfs_get_block_group(cache); spin_lock(&fs_info->zone_active_bgs_lock); list_add_tail(&cache->active_bg_list, @@ -1563,7 +1578,6 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) free = cache->zone_capacity - cache->alloc_offset; /* We only need ->free_space in ALLOC_SEQ block groups */ - cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; cache->free_space_ctl->free_space = free; cache->zone_unusable = unusable; @@ -1871,7 +1885,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) spin_lock(&space_info->lock); spin_lock(&block_group->lock); - if (block_group->zone_is_active) { + if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { ret = true; goto out_unlock; } @@ -1897,7 +1911,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) } /* Successfully activated all the zones */ - block_group->zone_is_active = 1; + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); space_info->active_total_bytes += block_group->length; spin_unlock(&block_group->lock); btrfs_try_granting_tickets(fs_info, space_info); @@ -1918,22 +1932,55 @@ out_unlock: return ret; } +static void wait_eb_writebacks(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + const u64 end = block_group->start + block_group->length; + struct radix_tree_iter iter; + struct extent_buffer *eb; + void __rcu **slot; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, + block_group->start >> fs_info->sectorsize_bits) { + eb = radix_tree_deref_slot(slot); + if (!eb) + continue; + if (radix_tree_deref_retry(eb)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + if (eb->start < block_group->start) + continue; + if (eb->start >= end) + break; + + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); + wait_on_extent_buffer_writeback(eb); + rcu_read_lock(); + } + rcu_read_unlock(); +} + static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct map_lookup *map; + const bool is_metadata = (block_group->flags & + (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)); int ret = 0; int i; spin_lock(&block_group->lock); - if (!block_group->zone_is_active) { + if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); return 0; } /* Check if we have unwritten allocated space */ - if ((block_group->flags & - (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) && + if (is_metadata && block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { spin_unlock(&block_group->lock); return -EAGAIN; @@ -1958,6 +2005,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ /* No need to wait for NOCOW writers. Zoned mode does not allow that */ btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, block_group->length); + /* Wait for extent buffers to be written. */ + if (is_metadata) + wait_eb_writebacks(block_group); spin_lock(&block_group->lock); @@ -1965,7 +2015,8 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ * Bail out if someone already deactivated the block group, or * allocated space is left in the block group. */ - if (!block_group->zone_is_active) { + if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &block_group->runtime_flags)) { spin_unlock(&block_group->lock); btrfs_dec_block_group_ro(block_group); return 0; @@ -1978,7 +2029,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ } } - block_group->zone_is_active = 0; + clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); block_group->alloc_offset = block_group->zone_capacity; block_group->free_space_ctl->free_space = 0; btrfs_clear_treelog_bg(block_group); @@ -2186,13 +2237,14 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); spin_lock(&block_group->lock); - if (!block_group->zoned_data_reloc_ongoing) + if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) goto out; /* All relocation extents are written. */ if (block_group->start + block_group->alloc_offset == logical + length) { /* Now, release this block group for further allocations. */ - block_group->zoned_data_reloc_ongoing = 0; + clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, + &block_group->runtime_flags); } out: @@ -2264,7 +2316,9 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, list) { if (!spin_trylock(&bg->lock)) continue; - if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) { + if (btrfs_zoned_bg_is_full(bg) || + test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &bg->runtime_flags)) { spin_unlock(&bg->lock); continue; } |