From 3093a479727be194996dbc40f803711af5877be4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:49 +0200 Subject: block: inherit the zoned characteristics in blk_stack_limits Lift the code from device mapper into blk_stack_limits to inherity the stacking limitations. This ensures we do the right thing for all stacked zoned block devices. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 + drivers/md/dm-table.c | 19 ------------------- include/linux/blkdev.h | 9 ++++++--- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 9a2c23cd9700..9cddbd736474 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -609,6 +609,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->chunk_sectors = min_not_zero(t->chunk_sectors, b->chunk_sectors); + t->zoned = max(t->zoned, b->zoned); return ret; } EXPORT_SYMBOL(blk_stack_limits); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0ea5b7367179..ec5364133cef 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -467,9 +467,6 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.logical_block_size, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); - - limits->zoned = blk_queue_zoned_model(q); - return 0; } @@ -1528,22 +1525,6 @@ combine_limits: dm_device_name(table->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); - - /* - * FIXME: this should likely be moved to blk_stack_limits(), would - * also eliminate limits->zoned stacking hack in dm_set_device_limits() - */ - if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { - /* - * By default, the stacked limits zoned model is set to - * BLK_ZONED_NONE in blk_set_stacking_limits(). Update - * this model using the first target model reported - * that is not BLK_ZONED_NONE. This will be either the - * first target device zoned model or the model reported - * by the target .io_hints. - */ - limits->zoned = ti_limits.zoned; - } } /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 63078944909c..9e331a1eb35f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -306,11 +306,14 @@ enum blk_queue_state { /* * Zoned block device models (zoned limit). + * + * Note: This needs to be ordered from the least to the most severe + * restrictions for the inheritance in blk_stack_limits() to work. */ enum blk_zoned_model { - BLK_ZONED_NONE, /* Regular block device */ - BLK_ZONED_HA, /* Host-aware zoned block device */ - BLK_ZONED_HM, /* Host-managed zoned block device */ + BLK_ZONED_NONE = 0, /* Regular block device */ + BLK_ZONED_HA, /* Host-aware zoned block device */ + BLK_ZONED_HM, /* Host-managed zoned block device */ }; struct queue_limits { -- cgit v1.2.3-59-g8ed1b From 9efa82ef2b15d1757dd6cc518988a4506554e893 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:50 +0200 Subject: block: remove bdev_stack_limits This function is just a tiny wrapper around blk_stack_limit and has two callers. Simplify the stack a bit by open coding it in the two callers. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 25 ++----------------------- drivers/md/dm-table.c | 3 ++- include/linux/blkdev.h | 2 -- 3 files changed, 4 insertions(+), 26 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 9cddbd736474..8c63af772685 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -614,28 +614,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, } EXPORT_SYMBOL(blk_stack_limits); -/** - * bdev_stack_limits - adjust queue limits for stacked drivers - * @t: the stacking driver limits (top device) - * @bdev: the component block_device (bottom) - * @start: first data sector within component device - * - * Description: - * Merges queue limits for a top device and a block_device. Returns - * 0 if alignment didn't change. Returns -1 if adding the bottom - * device caused misalignment. - */ -int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, - sector_t start) -{ - struct request_queue *bq = bdev_get_queue(bdev); - - start += get_start_sect(bdev); - - return blk_stack_limits(t, &bq->limits, start); -} -EXPORT_SYMBOL(bdev_stack_limits); - /** * disk_stack_limits - adjust queue limits for stacked drivers * @disk: MD/DM gendisk (top) @@ -651,7 +629,8 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, { struct request_queue *t = disk->queue; - if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { + if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits, + get_start_sect(bdev) + (offset >> 9)) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; disk_name(disk, 0, top); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ec5364133cef..aac4c31cfc84 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -458,7 +458,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, return 0; } - if (bdev_stack_limits(limits, bdev, start) < 0) + if (blk_stack_limits(limits, &q->limits, + get_start_sect(bdev) + start) < 0) DMWARN("%s: adding target device %s caused an alignment inconsistency: " "physical_block_size=%u, logical_block_size=%u, " "alignment_offset=%u, start=%llu", diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e331a1eb35f..54b963109e64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1139,8 +1139,6 @@ extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, - sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -- cgit v1.2.3-59-g8ed1b From b9b1a5d71533f2ccd54b810dffdcf0789b30ba9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:51 +0200 Subject: block: remove blk_queue_stack_limits This function is just a tiny wrapper around blk_stack_limits. Open code it int the two callers. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 11 ----------- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/nvme/host/core.c | 3 ++- include/linux/blkdev.h | 1 - 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 8c63af772685..76a7e03bcd6c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -455,17 +455,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); -/** - * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers - * @t: the stacking driver (top) - * @b: the underlying device (bottom) - **/ -void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) -{ - blk_stack_limits(&t->limits, &b->limits, 0); -} -EXPORT_SYMBOL(blk_queue_stack_limits); - /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index da4a3ebe04ef..d0d9a549b583 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1250,7 +1250,7 @@ static void fixup_discard_if_not_supported(struct request_queue *q) static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) { - /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits(): + /* Fixup max_write_zeroes_sectors after blk_stack_limits(): * if we can handle "zeroes" efficiently on the protocol, * we want to do that, even if our backend does not announce * max_write_zeroes_sectors itself. */ @@ -1361,7 +1361,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi decide_on_write_same_support(device, q, b, o, disable_write_same); if (b) { - blk_queue_stack_limits(q, b); + blk_stack_limits(&q->limits, &b->limits, 0); if (q->backing_dev_info->ra_pages != b->backing_dev_info->ra_pages) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e9d83b98d79d..aa2b66edba5e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2054,7 +2054,8 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); - blk_queue_stack_limits(ns->head->disk->queue, ns->queue); + blk_stack_limits(&ns->head->disk->queue->limits, + &ns->queue->limits, 0); nvme_mpath_update_disk_size(ns->head->disk); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 54b963109e64..bbdd3cf62038 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1141,7 +1141,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); -extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -- cgit v1.2.3-59-g8ed1b From 1a1206dc4cf02cee4b5cbce583ee4c22368b4c28 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 30 Jul 2020 20:25:17 +0900 Subject: block: don't do revalidate zones on invalid devices When we loose a device for whatever reason while (re)scanning zones, we trip over a NULL pointer in blk_revalidate_zone_cb, like in the following log: sd 0:0:0:0: [sda] 3418095616 4096-byte logical blocks: (14.0 TB/12.7 TiB) sd 0:0:0:0: [sda] 52156 zones of 65536 logical blocks sd 0:0:0:0: [sda] Write Protect is off sd 0:0:0:0: [sda] Mode Sense: 37 00 00 08 sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA sd 0:0:0:0: [sda] REPORT ZONES start lba 1065287680 failed sd 0:0:0:0: [sda] REPORT ZONES: Result: hostbyte=0x00 driverbyte=0x08 sd 0:0:0:0: [sda] Sense Key : 0xb [current] sd 0:0:0:0: [sda] ASC=0x0 ASCQ=0x6 sda: failed to revalidate zones sd 0:0:0:0: [sda] 0 4096-byte logical blocks: (0 B/0 B) sda: detected capacity change from 14000519643136 to 0 ================================================================== BUG: KASAN: null-ptr-deref in blk_revalidate_zone_cb+0x1b7/0x550 Write of size 8 at addr 0000000000000010 by task kworker/u4:1/58 CPU: 1 PID: 58 Comm: kworker/u4:1 Not tainted 5.8.0-rc1 #692 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014 Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack+0x7d/0xb0 ? blk_revalidate_zone_cb+0x1b7/0x550 kasan_report.cold+0x5/0x37 ? blk_revalidate_zone_cb+0x1b7/0x550 check_memory_region+0x145/0x1a0 blk_revalidate_zone_cb+0x1b7/0x550 sd_zbc_parse_report+0x1f1/0x370 ? blk_req_zone_write_trylock+0x200/0x200 ? sectors_to_logical+0x60/0x60 ? blk_req_zone_write_trylock+0x200/0x200 ? blk_req_zone_write_trylock+0x200/0x200 sd_zbc_report_zones+0x3c4/0x5e0 ? sd_dif_config_host+0x500/0x500 blk_revalidate_disk_zones+0x231/0x44d ? _raw_write_lock_irqsave+0xb0/0xb0 ? blk_queue_free_zone_bitmaps+0xd0/0xd0 sd_zbc_read_zones+0x8cf/0x11a0 sd_revalidate_disk+0x305c/0x64e0 ? __device_add_disk+0x776/0xf20 ? read_capacity_16.part.0+0x1080/0x1080 ? blk_alloc_devt+0x250/0x250 ? create_object.isra.0+0x595/0xa20 ? kasan_unpoison_shadow+0x33/0x40 sd_probe+0x8dc/0xcd2 really_probe+0x20e/0xaf0 __driver_attach_async_helper+0x249/0x2d0 async_run_entry_fn+0xbe/0x560 process_one_work+0x764/0x1290 ? _raw_read_unlock_irqrestore+0x30/0x30 worker_thread+0x598/0x12f0 ? __kthread_parkme+0xc6/0x1b0 ? schedule+0xed/0x2c0 ? process_one_work+0x1290/0x1290 kthread+0x36b/0x440 ? kthread_create_worker_on_cpu+0xa0/0xa0 ret_from_fork+0x22/0x30 ================================================================== When the device is already gone we end up with the following scenario: The device's capacity is 0 and thus the number of zones will be 0 as well. When allocating the bitmap for the conventional zones, we then trip over a NULL pointer. So if we encounter a zoned block device with a 0 capacity, don't dare to revalidate the zones sizes. Fixes: 6c6b35491422 ("block: set the zone size in blk_revalidate_disk_zones atomically") Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-zoned.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 81152a260354..6817a673e5ce 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -498,6 +498,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk, if (WARN_ON_ONCE(!queue_is_mq(q))) return -EIO; + if (!get_capacity(disk)) + return -EIO; + /* * Ensure that all memory allocations in this context are done as if * GFP_NOIO was specified. -- cgit v1.2.3-59-g8ed1b