aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-zoned-metadata.c
diff options
context:
space:
mode:
authorHannes Reinecke <hare@suse.de>2020-05-11 10:24:30 +0200
committerMike Snitzer <snitzer@redhat.com>2020-05-20 17:09:43 -0400
commitbd5c40313a1467e4683d92456fc5219d94823f24 (patch)
treea31a375c24e02dba81faa6daf64204e63e0190fb /drivers/md/dm-zoned-metadata.c
parentdm zoned: ignore metadata zone in dmz_alloc_zone() (diff)
downloadlinux-dev-bd5c40313a1467e4683d92456fc5219d94823f24.tar.xz
linux-dev-bd5c40313a1467e4683d92456fc5219d94823f24.zip
dm zoned: metadata version 2
Implement handling for metadata version 2. The new metadata adds a label and UUID for the device mapper device, and additional UUID for the underlying block devices. It also allows for an additional regular drive to be used for emulating random access zones. The emulated zones will be placed logically in front of the zones from the zoned block device, causing the superblocks and metadata to be stored on that device. The first zone of the original zoned device will be used to hold another, tertiary copy of the metadata; this copy carries a generation number of 0 and is never updated; it's just used for identification. Signed-off-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Bob Liu <bob.liu@oracle.com> Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-zoned-metadata.c')
-rw-r--r--drivers/md/dm-zoned-metadata.c310
1 files changed, 261 insertions, 49 deletions
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 939deed1606a..fba690dd37f5 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -16,7 +16,7 @@
/*
* Metadata version.
*/
-#define DMZ_META_VER 1
+#define DMZ_META_VER 2
/*
* On-disk super block magic.
@@ -69,8 +69,17 @@ struct dmz_super {
/* Checksum */
__le32 crc; /* 48 */
+ /* DM-Zoned label */
+ u8 dmz_label[32]; /* 80 */
+
+ /* DM-Zoned UUID */
+ u8 dmz_uuid[16]; /* 96 */
+
+ /* Device UUID */
+ u8 dev_uuid[16]; /* 112 */
+
/* Padding to full 512B sector */
- u8 reserved[464]; /* 512 */
+ u8 reserved[400]; /* 512 */
};
/*
@@ -133,8 +142,11 @@ struct dmz_sb {
*/
struct dmz_metadata {
struct dmz_dev *dev;
+ unsigned int nr_devs;
char devname[BDEVNAME_SIZE];
+ char label[BDEVNAME_SIZE];
+ uuid_t uuid;
sector_t zone_bitmap_size;
unsigned int zone_nr_bitmap_blocks;
@@ -161,8 +173,9 @@ struct dmz_metadata {
/* Zone information array */
struct dm_zone *zones;
- struct dmz_sb sb[2];
+ struct dmz_sb sb[3];
unsigned int mblk_primary;
+ unsigned int sb_version;
u64 sb_gen;
unsigned int min_nr_mblks;
unsigned int max_nr_mblks;
@@ -195,31 +208,56 @@ struct dmz_metadata {
};
#define dmz_zmd_info(zmd, format, args...) \
- DMINFO("(%s): " format, (zmd)->devname, ## args)
+ DMINFO("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_err(zmd, format, args...) \
- DMERR("(%s): " format, (zmd)->devname, ## args)
+ DMERR("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_warn(zmd, format, args...) \
- DMWARN("(%s): " format, (zmd)->devname, ## args)
+ DMWARN("(%s): " format, (zmd)->label, ## args)
#define dmz_zmd_debug(zmd, format, args...) \
- DMDEBUG("(%s): " format, (zmd)->devname, ## args)
+ DMDEBUG("(%s): " format, (zmd)->label, ## args)
/*
* Various accessors
*/
+static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone)
+{
+ unsigned int zone_id;
+
+ if (WARN_ON(!zone))
+ return 0;
+
+ zone_id = zone->id;
+ if (zmd->nr_devs > 1 &&
+ (zone_id >= zmd->dev[1].zone_offset))
+ zone_id -= zmd->dev[1].zone_offset;
+ return zone_id;
+}
+
sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)zone->id << zmd->zone_nr_sectors_shift;
+ unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+ return (sector_t)zone_id << zmd->zone_nr_sectors_shift;
}
sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- return (sector_t)zone->id << zmd->zone_nr_blocks_shift;
+ unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+ return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
}
struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
{
+ if (WARN_ON(!zone))
+ return &zmd->dev[0];
+
+ if (zmd->nr_devs > 1 &&
+ zone->id >= zmd->dev[1].zone_offset)
+ return &zmd->dev[1];
+
return &zmd->dev[0];
}
@@ -275,17 +313,29 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
const char *dmz_metadata_label(struct dmz_metadata *zmd)
{
- return (const char *)zmd->devname;
+ return (const char *)zmd->label;
}
bool dmz_check_dev(struct dmz_metadata *zmd)
{
- return dmz_check_bdev(&zmd->dev[0]);
+ unsigned int i;
+
+ for (i = 0; i < zmd->nr_devs; i++) {
+ if (!dmz_check_bdev(&zmd->dev[i]))
+ return false;
+ }
+ return true;
}
bool dmz_dev_is_dying(struct dmz_metadata *zmd)
{
- return dmz_bdev_is_dying(&zmd->dev[0]);
+ unsigned int i;
+
+ for (i = 0; i < zmd->nr_devs; i++) {
+ if (dmz_bdev_is_dying(&zmd->dev[i]))
+ return true;
+ }
+ return false;
}
/*
@@ -687,6 +737,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
struct bio *bio;
int ret;
+ if (WARN_ON(!dev))
+ return -EIO;
+
if (dmz_bdev_is_dying(dev))
return -EIO;
@@ -711,19 +764,32 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
*/
static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
{
- sector_t block = zmd->sb[set].block;
struct dmz_mblock *mblk = zmd->sb[set].mblk;
struct dmz_super *sb = zmd->sb[set].sb;
struct dmz_dev *dev = zmd->sb[set].dev;
+ sector_t sb_block;
u64 sb_gen = zmd->sb_gen + 1;
int ret;
sb->magic = cpu_to_le32(DMZ_MAGIC);
- sb->version = cpu_to_le32(DMZ_META_VER);
+
+ sb->version = cpu_to_le32(zmd->sb_version);
+ if (zmd->sb_version > 1) {
+ BUILD_BUG_ON(UUID_SIZE != 16);
+ export_uuid(sb->dmz_uuid, &zmd->uuid);
+ memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE);
+ export_uuid(sb->dev_uuid, &dev->uuid);
+ }
sb->gen = cpu_to_le64(sb_gen);
- sb->sb_block = cpu_to_le64(block);
+ /*
+ * The metadata always references the absolute block address,
+ * ie relative to the entire block range, not the per-device
+ * block address.
+ */
+ sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift;
+ sb->sb_block = cpu_to_le64(sb_block);
sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks);
sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq);
sb->nr_chunks = cpu_to_le32(zmd->nr_chunks);
@@ -734,7 +800,8 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
sb->crc = 0;
sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE));
- ret = dmz_rdwr_block(dev, REQ_OP_WRITE, block, mblk->page);
+ ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block,
+ mblk->page);
if (ret == 0)
ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL);
@@ -915,6 +982,23 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
u32 crc, stored_crc;
u64 gen;
+ if (le32_to_cpu(sb->magic) != DMZ_MAGIC) {
+ dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)",
+ DMZ_MAGIC, le32_to_cpu(sb->magic));
+ return -ENXIO;
+ }
+
+ zmd->sb_version = le32_to_cpu(sb->version);
+ if (zmd->sb_version > DMZ_META_VER) {
+ dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)",
+ DMZ_META_VER, zmd->sb_version);
+ return -EINVAL;
+ }
+ if ((zmd->sb_version < 1) && (set == 2)) {
+ dmz_dev_err(dev, "Tertiary superblocks are not supported");
+ return -EINVAL;
+ }
+
gen = le64_to_cpu(sb->gen);
stored_crc = le32_to_cpu(sb->crc);
sb->crc = 0;
@@ -925,16 +1009,45 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
return -ENXIO;
}
- if (le32_to_cpu(sb->magic) != DMZ_MAGIC) {
- dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)",
- DMZ_MAGIC, le32_to_cpu(sb->magic));
- return -ENXIO;
- }
+ if (zmd->sb_version > 1) {
+ uuid_t sb_uuid;
+
+ import_uuid(&sb_uuid, sb->dmz_uuid);
+ if (uuid_is_null(&sb_uuid)) {
+ dmz_dev_err(dev, "NULL DM-Zoned uuid");
+ return -ENXIO;
+ } else if (uuid_is_null(&zmd->uuid)) {
+ uuid_copy(&zmd->uuid, &sb_uuid);
+ } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) {
+ dmz_dev_err(dev, "mismatching DM-Zoned uuid, "
+ "is %pUl expected %pUl",
+ &sb_uuid, &zmd->uuid);
+ return -ENXIO;
+ }
+ if (!strlen(zmd->label))
+ memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE);
+ else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) {
+ dmz_dev_err(dev, "mismatching DM-Zoned label, "
+ "is %s expected %s",
+ sb->dmz_label, zmd->label);
+ return -ENXIO;
+ }
+ import_uuid(&dev->uuid, sb->dev_uuid);
+ if (uuid_is_null(&dev->uuid)) {
+ dmz_dev_err(dev, "NULL device uuid");
+ return -ENXIO;
+ }
- if (le32_to_cpu(sb->version) != DMZ_META_VER) {
- dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)",
- DMZ_META_VER, le32_to_cpu(sb->version));
- return -ENXIO;
+ if (set == 2) {
+ /*
+ * Generation number should be 0, but it doesn't
+ * really matter if it isn't.
+ */
+ if (gen != 0)
+ dmz_dev_warn(dev, "Invalid generation %llu",
+ gen);
+ return 0;
+ }
}
nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1)
@@ -1185,21 +1298,38 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
"Using super block %u (gen %llu)",
zmd->mblk_primary, zmd->sb_gen);
+ if ((zmd->sb_version > 1) && zmd->sb[2].zone) {
+ zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone);
+ zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone);
+ ret = dmz_get_sb(zmd, 2);
+ if (ret) {
+ dmz_dev_err(zmd->sb[2].dev,
+ "Read tertiary super block failed");
+ return ret;
+ }
+ ret = dmz_check_sb(zmd, 2);
+ if (ret == -EINVAL)
+ return ret;
+ }
return 0;
}
/*
* Initialize a zone descriptor.
*/
-static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
+static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
{
struct dmz_metadata *zmd = data;
+ struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0];
+ int idx = num + dev->zone_offset;
struct dm_zone *zone = &zmd->zones[idx];
- struct dmz_dev *dev = zmd->dev;
- /* Ignore the eventual last runt (smaller) zone */
if (blkz->len != zmd->zone_nr_sectors) {
- if (blkz->start + blkz->len == dev->capacity)
+ if (zmd->sb_version > 1) {
+ /* Ignore the eventual runt (smaller) zone */
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ return 0;
+ } else if (blkz->start + blkz->len == dev->capacity)
return 0;
return -ENXIO;
}
@@ -1234,16 +1364,45 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
zmd->nr_useable_zones++;
if (dmz_is_rnd(zone)) {
zmd->nr_rnd_zones++;
- if (!zmd->sb[0].zone) {
- /* Super block zone */
+ if (zmd->nr_devs == 1 && !zmd->sb[0].zone) {
+ /* Primary super block zone */
zmd->sb[0].zone = zone;
}
}
+ if (zmd->nr_devs > 1 && !zmd->sb[2].zone) {
+ /* Tertiary superblock zone */
+ zmd->sb[2].zone = zone;
+ }
}
return 0;
}
+static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
+{
+ int idx;
+ sector_t zone_offset = 0;
+
+ for(idx = 0; idx < dev->nr_zones; idx++) {
+ struct dm_zone *zone = &zmd->zones[idx];
+
+ INIT_LIST_HEAD(&zone->link);
+ atomic_set(&zone->refcount, 0);
+ zone->id = idx;
+ zone->chunk = DMZ_MAP_UNMAPPED;
+ set_bit(DMZ_RND, &zone->flags);
+ zone->wp_block = 0;
+ zmd->nr_rnd_zones++;
+ zmd->nr_useable_zones++;
+ if (dev->capacity - zone_offset < zmd->zone_nr_sectors) {
+ /* Disable runt zone */
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ break;
+ }
+ zone_offset += zmd->zone_nr_sectors;
+ }
+}
+
/*
* Free zones descriptors.
*/
@@ -1259,11 +1418,11 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
*/
static int dmz_init_zones(struct dmz_metadata *zmd)
{
- struct dmz_dev *dev = &zmd->dev[0];
- int ret;
+ int i, ret;
+ struct dmz_dev *zoned_dev = &zmd->dev[0];
/* Init */
- zmd->zone_nr_sectors = dev->zone_nr_sectors;
+ zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors;
zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors);
zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors);
zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks);
@@ -1274,7 +1433,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
DMZ_BLOCK_SIZE_BITS);
/* Allocate zone array */
- zmd->nr_zones = dev->nr_zones;
+ zmd->nr_zones = 0;
+ for (i = 0; i < zmd->nr_devs; i++)
+ zmd->nr_zones += zmd->dev[i].nr_zones;
+
+ if (!zmd->nr_zones) {
+ DMERR("(%s): No zones found", zmd->devname);
+ return -ENXIO;
+ }
zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
if (!zmd->zones)
return -ENOMEM;
@@ -1282,14 +1448,27 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
DMDEBUG("(%s): Using %zu B for zone information",
zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones);
+ if (zmd->nr_devs > 1) {
+ dmz_emulate_zones(zmd, &zmd->dev[0]);
+ /*
+ * Primary superblock zone is always at zone 0 when multiple
+ * drives are present.
+ */
+ zmd->sb[0].zone = &zmd->zones[0];
+
+ zoned_dev = &zmd->dev[1];
+ }
+
/*
* Get zone information and initialize zone descriptors. At the same
* time, determine where the super block should be: first block of the
* first randomly writable zone.
*/
- ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone,
- zmd);
+ ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES,
+ dmz_init_zone, zmd);
if (ret < 0) {
+ DMDEBUG("(%s): Failed to report zones, error %d",
+ zmd->devname, ret);
dmz_drop_zones(zmd);
return ret;
}
@@ -1325,6 +1504,9 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
unsigned int noio_flag;
int ret;
+ if (dev->flags & DMZ_BDEV_REGULAR)
+ return 0;
+
/*
* Get zone information from disk. Since blkdev_report_zones() uses
* GFP_KERNEL by default for memory allocations, set the per-task
@@ -2475,19 +2657,34 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num)
{
struct dmz_dev *dev = &zmd->dev[num];
- dmz_dev_info(dev, "Host-%s zoned block device",
- bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
- "aware" : "managed");
- dmz_dev_info(dev, " %llu 512-byte logical sectors",
- (u64)dev->capacity);
- dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors",
- dev->nr_zones, (u64)zmd->zone_nr_sectors);
+ if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
+ dmz_dev_info(dev, "Regular block device");
+ else
+ dmz_dev_info(dev, "Host-%s zoned block device",
+ bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
+ "aware" : "managed");
+ if (zmd->sb_version > 1) {
+ sector_t sector_offset =
+ dev->zone_offset << zmd->zone_nr_sectors_shift;
+
+ dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)",
+ (u64)dev->capacity, (u64)sector_offset);
+ dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)",
+ dev->nr_zones, (u64)zmd->zone_nr_sectors,
+ (u64)dev->zone_offset);
+ } else {
+ dmz_dev_info(dev, " %llu 512-byte logical sectors",
+ (u64)dev->capacity);
+ dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors",
+ dev->nr_zones, (u64)zmd->zone_nr_sectors);
+ }
}
/*
* Initialize the zoned metadata.
*/
-int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
+int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
+ struct dmz_metadata **metadata,
const char *devname)
{
struct dmz_metadata *zmd;
@@ -2501,6 +2698,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
strcpy(zmd->devname, devname);
zmd->dev = dev;
+ zmd->nr_devs = num_dev;
zmd->mblk_rbtree = RB_ROOT;
init_rwsem(&zmd->mblk_sem);
mutex_init(&zmd->mblk_flush_lock);
@@ -2535,11 +2733,24 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
/* Set metadata zones starting from sb_zone */
for (i = 0; i < zmd->nr_meta_zones << 1; i++) {
zone = dmz_get(zmd, zmd->sb[0].zone->id + i);
- if (!dmz_is_rnd(zone))
+ if (!dmz_is_rnd(zone)) {
+ dmz_zmd_err(zmd,
+ "metadata zone %d is not random", i);
+ ret = -ENXIO;
goto err;
+ }
+ set_bit(DMZ_META, &zone->flags);
+ }
+ if (zmd->sb[2].zone) {
+ zone = dmz_get(zmd, zmd->sb[2].zone->id);
+ if (!zone) {
+ dmz_zmd_err(zmd,
+ "Tertiary metadata zone not present");
+ ret = -ENXIO;
+ goto err;
+ }
set_bit(DMZ_META, &zone->flags);
}
-
/* Load mapping table */
ret = dmz_load_mapping(zmd);
if (ret)
@@ -2564,8 +2775,9 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata,
goto err;
}
- dmz_zmd_info(zmd, "DM-Zoned metadata version %d", DMZ_META_VER);
- dmz_print_dev(zmd, 0);
+ dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version);
+ for (i = 0; i < zmd->nr_devs; i++)
+ dmz_print_dev(zmd, i);
dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors",
zmd->nr_zones, (u64)zmd->zone_nr_sectors);