From c69cb1d17b6258830b0a97dde73525c1006898f8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 Jun 2020 13:20:59 +0900 Subject: dm zoned: fix uninitialized pointer dereference Make sure that the local variable rzone in dmz_do_reclaim() is always initialized before being used for printing debug messages. Fixes: f97809aec589 ("dm zoned: per-device reclaim") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-reclaim.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 2261b4dd60b7..dd1eebf6e50f 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -377,6 +377,7 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) dmz_metadata_label(zmd), zrc->dev_idx); return -EBUSY; } + rzone = dzone; start = jiffies; if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) { @@ -391,8 +392,6 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) */ ret = dmz_reclaim_rnd_data(zrc, dzone); } - rzone = dzone; - } else { struct dm_zone *bzone = dzone->bzone; sector_t chunk_block = 0; @@ -415,7 +414,6 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) * be later reclaimed. */ ret = dmz_reclaim_seq_data(zrc, dzone); - rzone = dzone; } } out: -- cgit v1.2.3-59-g8ed1b From 7b2377486767503d47265e4d487a63c651f6b55d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 15 Jun 2020 11:33:23 +0800 Subject: dm zoned: assign max_io_len correctly The unit of max_io_len is sector instead of byte (spotted through code review), so fix it. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Hou Tao Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index a907a9446c0b..cf915009c306 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* Set target (no write same support) */ - ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9; + ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata); ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_zeroes_bios = 1; -- cgit v1.2.3-59-g8ed1b From 39495b12ef1cf602e6abd350dce2ef4199906531 Mon Sep 17 00:00:00 2001 From: Huaisheng Ye Date: Fri, 12 Jun 2020 23:59:11 +0800 Subject: dm writecache: correct uncommitted_block when discarding uncommitted entry When uncommitted entry has been discarded, correct wc->uncommitted_block for getting the exact number. Fixes: 48debafe4f2fe ("dm: add writecache target") Cc: stable@vger.kernel.org Signed-off-by: Huaisheng Ye Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 74f3c506f084..ddff5527dbdf 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -853,6 +853,8 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ writecache_wait_for_ios(wc, WRITE); discarded_something = true; } + if (!writecache_entry_is_committed(wc, e)) + wc->uncommitted_blocks--; writecache_free_entry(wc, e); } -- cgit v1.2.3-59-g8ed1b From a143e172b66d992092eb6cf984f79d6989b684d6 Mon Sep 17 00:00:00 2001 From: Huaisheng Ye Date: Fri, 12 Jun 2020 23:55:44 +0800 Subject: dm writecache: skip writecache_wait when using pmem mode The array bio_in_progress is only used with ssd mode. So skip writecache_wait_for_ios in writecache_discard when pmem mode. Signed-off-by: Huaisheng Ye Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index ddff5527dbdf..6b28990b1d84 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -849,8 +849,10 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ if (likely(!e->write_in_progress)) { if (!discarded_something) { - writecache_wait_for_ios(wc, READ); - writecache_wait_for_ios(wc, WRITE); + if (!WC_MODE_PMEM(wc)) { + writecache_wait_for_ios(wc, READ); + writecache_wait_for_ios(wc, WRITE); + } discarded_something = true; } if (!writecache_entry_is_committed(wc, e)) -- cgit v1.2.3-59-g8ed1b From da8996250a7cb9fd6a262dc899258ad0f4021b68 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 12:31:45 -0400 Subject: dm ioctl: use struct_size() helper in retrieve_deps() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct dm_target_deps { ... __u64 dev[0]; /* out */ }; Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ac83f5002ce5..489935d5f22d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1471,7 +1471,7 @@ static void retrieve_deps(struct dm_table *table, /* * Check we have enough space. */ - needed = sizeof(*deps) + (sizeof(*deps->dev) * count); + needed = struct_size(deps, dev, count); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; return; -- cgit v1.2.3-59-g8ed1b From b38c0ad57f8e7810f2d994f49dab4ccb129cf31c Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 18 Jun 2020 11:48:10 +0900 Subject: dm zoned: Fix metadata zone size check When dm zoned has multiple devices, metadata is on the cache device, not in random zones of the zoned devices. Then the number of metadata zones shall be checked with the number of cache zones, not random zones. Fixes: 34f5affd04c4 ("dm zoned: separate random and cache zones") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 130b5a6d9f12..fc1329ca3e70 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1078,7 +1078,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) >> zmd->zone_nr_blocks_shift; if (!nr_meta_zones || - nr_meta_zones >= zmd->nr_rnd_zones) { + (zmd->nr_devs <= 1 && nr_meta_zones >= zmd->nr_rnd_zones) || + (zmd->nr_devs > 1 && nr_meta_zones >= zmd->nr_cache_zones)) { dmz_dev_err(dev, "Invalid number of metadata blocks"); return -ENXIO; } -- cgit v1.2.3-59-g8ed1b From 415c79e13b17ce5f0a0815bab4d584f75f4d1171 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 19 Jun 2020 15:59:04 +0900 Subject: dm: update original bio sector on Zone Append Naohiro reported that issuing zone-append bios to a zoned block device underneath a dm-linear device does not work as expected. This because we forgot to reverse-map the sector the device wrote to the original bio. For zone-append bios, get the offset in the zone of the written sector from the clone bio and add that to the original bio's sector position. Fixes: 0512a75b98f8 ("block: Introduce REQ_OP_ZONE_APPEND") Cc: stable@vger.kernel.org Reported-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 109e81f33edb..e6807792fec8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1009,6 +1009,7 @@ static void clone_endio(struct bio *bio) struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; + struct bio *orig_bio = io->orig_bio; if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { if (bio_op(bio) == REQ_OP_DISCARD && @@ -1022,6 +1023,18 @@ static void clone_endio(struct bio *bio) disable_write_zeroes(md); } + /* + * For zone-append bios get offset in zone of the written + * sector and add that to the original bio sector pos. + */ + if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { + sector_t written_sector = bio->bi_iter.bi_sector; + struct request_queue *q = orig_bio->bi_disk->queue; + u64 mask = (u64)blk_queue_zone_sectors(q) - 1; + + orig_bio->bi_iter.bi_sector += written_sector & mask; + } + if (endio) { int r = endio(tio->ti, bio, &error); switch (r) { -- cgit v1.2.3-59-g8ed1b From 3ee39573e567eded4f73e1e22216034bc29f3813 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 19 Jun 2020 16:49:55 +0900 Subject: dm zoned: Fix random zone reclaim selection Commit 2094045fe5b5 ("dm zoned: prefer full zones for reclaim") modified dmz_get_rnd_zone_for_reclaim() to add a search for the buffer zone with the heaviest weight as an optimal candidate for reclaim. This modification uses the zone pointer variabl "last" which is set only once and never modified as zones are scanned, resulting in the search being inefective. Furthermore, if the selected buffer zone at the end of the search loop is active or already locked for reclaim, dmz_get_rnd_zone_for_reclaim() returns NULL even if other random zones with a lesser weight can be reclaimed. To fix the search and to guarantee that reclaim can make forward progress, fix dmz_get_rnd_zone_for_reclaim() loop to correctly find the buffer zone with the heaviest weight using the variable maxw_z. Also make sure to fallback to finding the first random zone that can be reclaimed if this best candidate zone cannot be reclaimed. While at it, also fix the device index check to consider only random zones, ignoring cache zones belonging to the cache device if one is used as that device does not have a reclaim process. Fixes: 2094045fe5b5 ("dm zoned: prefer full zones for reclaim") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index fc1329ca3e70..b40e643ddf73 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1950,7 +1950,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; - struct dm_zone *zone, *last = NULL; + struct dm_zone *zone, *maxw_z = NULL; struct list_head *zone_list; /* If we have cache zones select from the cache zone list */ @@ -1962,18 +1962,37 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, } else zone_list = &zmd->dev[idx].map_rnd_list; + /* + * Find the buffer zone with the heaviest weight or the first (oldest) + * data zone that can be reclaimed. + */ list_for_each_entry(zone, zone_list, link) { if (dmz_is_buf(zone)) { dzone = zone->bzone; - if (dzone->dev->dev_idx != idx) + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - if (!last) { - last = dzone; - continue; - } - if (last->weight < dzone->weight) + if (!maxw_z || maxw_z->weight < dzone->weight) + maxw_z = dzone; + } else { + dzone = zone; + if (dmz_lock_zone_reclaim(dzone)) + return dzone; + } + } + + if (maxw_z && dmz_lock_zone_reclaim(maxw_z)) + return maxw_z; + + /* + * If we come here, none of the zones inspected could be locked for + * reclaim. Try again, being more aggressive, that is, find the + * first zone that can be reclaimed regardless of its weitght. + */ + list_for_each_entry(zone, zone_list, link) { + if (dmz_is_buf(zone)) { + dzone = zone->bzone; + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - dzone = last; } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) -- cgit v1.2.3-59-g8ed1b From f2cd9a5e85dc25e10f6318bc6fbcb7dcff262561 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 19 Jun 2020 16:49:56 +0900 Subject: dm zoned: Fix reclaim zone selection When dm zoned has multiple devices, random zones are never selected for reclaim if all reserved sequential write zones are in use and no sequential write required zones can be selected for reclaim. This can lead to deadlocks as selecting a cache zone allows reclaiming a sequential zone, ensuring forward progress. Fix this by always defaulting to selecting a random zone when no sequential write required zone can be selected. [Damien: fix commit message] Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b40e643ddf73..5cf6f5f552e0 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2026,7 +2026,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int dev_idx, bool idle) { - struct dm_zone *zone; + struct dm_zone *zone = NULL; /* * Search for a zone candidate to reclaim: 2 cases are possible. @@ -2039,7 +2039,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, dmz_lock_map(zmd); if (list_empty(&zmd->reserved_seq_zones_list)) zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx); - else + if (!zone) zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle); dmz_unlock_map(zmd); -- cgit v1.2.3-59-g8ed1b From d35bd764e6899a7bea71958f08d16cea5bfa1919 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 19 Jun 2020 11:51:34 -0400 Subject: dm writecache: add cond_resched to loop in persistent_memory_claim() Add cond_resched() to a loop that fills in the mapper memory area because the loop can be executed many times. Fixes: 48debafe4f2fe ("dm: add writecache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 6b28990b1d84..30505d70f423 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -282,6 +282,8 @@ static int persistent_memory_claim(struct dm_writecache *wc) while (daa-- && i < p) { pages[i++] = pfn_t_to_page(pfn); pfn.val++; + if (!(i & 15)) + cond_resched(); } } while (i < p); wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); -- cgit v1.2.3-59-g8ed1b