aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/super.c
diff options
context:
space:
mode:
authorAravind Ramesh <aravind.ramesh@wdc.com>2020-07-16 18:26:56 +0530
committerJaegeuk Kim <jaegeuk@kernel.org>2020-09-10 14:03:29 -0700
commitde881df97768d07b342cbd1f8359b832afccace9 (patch)
tree5ed050780b9dd7c7e4e304c89ed7f7abc9c4e60c /fs/f2fs/super.c
parentMerge tag 'f2fs-for-5.9-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs (diff)
downloadlinux-dev-de881df97768d07b342cbd1f8359b832afccace9.tar.xz
linux-dev-de881df97768d07b342cbd1f8359b832afccace9.zip
f2fs: support zone capacity less than zone size
NVMe Zoned Namespace devices can have zone-capacity less than zone-size. Zone-capacity indicates the maximum number of sectors that are usable in a zone beginning from the first sector of the zone. This makes the sectors sectors after the zone-capacity till zone-size to be unusable. This patch set tracks zone-size and zone-capacity in zoned devices and calculate the usable blocks per segment and usable segments per section. If zone-capacity is less than zone-size mark only those segments which start before zone-capacity as free segments. All segments at and beyond zone-capacity are treated as permanently used segments. In cases where zone-capacity does not align with segment size the last segment will start before zone-capacity and end beyond the zone-capacity of the zone. For such spanning segments only sectors within the zone-capacity are used. During writes and GC manage the usable segments in a section and usable blocks per segment. Segments which are beyond zone-capacity are never allocated, and do not need to be garbage collected, only the segments which are before zone-capacity needs to garbage collected. For spanning segments based on the number of usable blocks in that segment, write to blocks only up to zone-capacity. Zone-capacity is device specific and cannot be configured by the user. Since NVMe ZNS device zones are sequentially write only, a block device with conventional zones or any normal block device is needed along with the ZNS device for the metadata operations of F2fs. A typical nvme-cli output of a zoned device shows zone start and capacity and write pointer as below: SLBA: 0x0 WP: 0x0 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x20000 WP: 0x20000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x40000 WP: 0x40000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ Here zone size is 64MB, capacity is 49MB, WP is at zone start as the zones are in EMPTY state. For each zone, only zone start + 49MB is usable area, any lba/sector after 49MB cannot be read or written to, the drive will fail any attempts to read/write. So, the second zone starts at 64MB and is usable till 113MB (64 + 49) and the range between 113 and 128MB is again unusable. The next zone starts at 128MB, and so on. Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/super.c')
-rw-r--r--fs/f2fs/super.c41
1 files changed, 35 insertions, 6 deletions
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index dfa072fa8081..970257ee5d65 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1184,6 +1184,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
+ kfree(FDEV(i).zone_capacity_blocks);
#endif
}
kvfree(sbi->devs);
@@ -3088,13 +3089,26 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+struct f2fs_report_zones_args {
+ struct f2fs_dev_info *dev;
+ bool zone_cap_mismatch;
+};
+
static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
+ void *data)
{
- struct f2fs_dev_info *dev = data;
+ struct f2fs_report_zones_args *rz_args = data;
+
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return 0;
+
+ set_bit(idx, rz_args->dev->blkz_seq);
+ rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
+ F2FS_LOG_SECTORS_PER_BLOCK;
+ if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
+ rz_args->zone_cap_mismatch = true;
- if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(idx, dev->blkz_seq);
return 0;
}
@@ -3102,6 +3116,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
+ struct f2fs_report_zones_args rep_zone_arg;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
@@ -3127,12 +3142,26 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
- /* Get block zones type */
+ /* Get block zones type and zone-capacity */
+ FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
+ FDEV(devi).nr_blkz * sizeof(block_t),
+ GFP_KERNEL);
+ if (!FDEV(devi).zone_capacity_blocks)
+ return -ENOMEM;
+
+ rep_zone_arg.dev = &FDEV(devi);
+ rep_zone_arg.zone_cap_mismatch = false;
+
ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
- &FDEV(devi));
+ &rep_zone_arg);
if (ret < 0)
return ret;
+ if (!rep_zone_arg.zone_cap_mismatch) {
+ kfree(FDEV(devi).zone_capacity_blocks);
+ FDEV(devi).zone_capacity_blocks = NULL;
+ }
+
return 0;
}
#endif