From b82d9fa257cb3725c49d94d2aeafc4677c34448a Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:20 -0700
Subject: block: fix infinite loop for invalid zone append

Returning 0 early from __bio_iov_append_get_pages() for the
max_append_sectors warning just creates an infinite loop since 0 means
success, and the bio will never fill from the unadvancing iov_iter. We
could turn the return into an error value, but it will already be turned
into an error value later on, so just remove the warning. Clearly no one
ever hit it anyway.

Fixes: 0512a75b98f84 ("block: Introduce REQ_OP_ZONE_APPEND")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220610195830.3574005-2-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 51c99f2c5c90..d9ff51fc457e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1229,9 +1229,6 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
 	size_t offset;
 	int ret = 0;
 
-	if (WARN_ON_ONCE(!max_append_sectors))
-		return 0;
-
 	/*
 	 * Move page array up in the allocated memory for the bio vecs as far as
 	 * possible so that we can start filling biovecs from the beginning
-- 
cgit v1.2.3-59-g8ed1b


From c58c0074c54c2e2bb3bb0d5a4d8896bb660cc8bc Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:21 -0700
Subject: block/bio: remove duplicate append pages code

The getting pages setup for zone append and normal IO are identical. Use
common code for each.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610195830.3574005-3-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 102 +++++++++++++++++++++++++-----------------------------------
 1 file changed, 42 insertions(+), 60 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index d9ff51fc457e..ee5fe1bb015e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off)
 		put_page(pages[i]);
 }
 
+static int bio_iov_add_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int offset)
+{
+	bool same_page = false;
+
+	if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
+		if (WARN_ON_ONCE(bio_full(bio, len)))
+			return -EINVAL;
+		__bio_add_page(bio, page, len, offset);
+		return 0;
+	}
+
+	if (same_page)
+		put_page(page);
+	return 0;
+}
+
+static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int offset)
+{
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+	bool same_page = false;
+
+	if (bio_add_hw_page(q, bio, page, len, offset,
+			queue_max_zone_append_sectors(q), &same_page) != len)
+		return -EINVAL;
+	if (same_page)
+		put_page(page);
+	return 0;
+}
+
 #define PAGE_PTRS_PER_BVEC     (sizeof(struct bio_vec) / sizeof(struct page *))
 
 /**
@@ -1177,7 +1208,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
-	bool same_page = false;
 	ssize_t size, left;
 	unsigned len, i;
 	size_t offset;
@@ -1186,7 +1216,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	 * Move page array up in the allocated memory for the bio vecs as far as
 	 * possible so that we can start filling biovecs from the beginning
 	 * without overwriting the temporary page array.
-	*/
+	 */
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
@@ -1196,18 +1226,18 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 
 	for (left = size, i = 0; left > 0; left -= len, i++) {
 		struct page *page = pages[i];
+		int ret;
 
 		len = min_t(size_t, PAGE_SIZE - offset, left);
+		if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+			ret = bio_iov_add_zone_append_page(bio, page, len,
+					offset);
+		else
+			ret = bio_iov_add_page(bio, page, len, offset);
 
-		if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
-			if (same_page)
-				put_page(page);
-		} else {
-			if (WARN_ON_ONCE(bio_full(bio, len))) {
-				bio_put_pages(pages + i, left, offset);
-				return -EINVAL;
-			}
-			__bio_add_page(bio, page, len, offset);
+		if (ret) {
+			bio_put_pages(pages + i, left, offset);
+			return ret;
 		}
 		offset = 0;
 	}
@@ -1216,51 +1246,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	return 0;
 }
 
-static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
-{
-	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
-	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
-	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
-	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
-	struct page **pages = (struct page **)bv;
-	ssize_t size, left;
-	unsigned len, i;
-	size_t offset;
-	int ret = 0;
-
-	/*
-	 * Move page array up in the allocated memory for the bio vecs as far as
-	 * possible so that we can start filling biovecs from the beginning
-	 * without overwriting the temporary page array.
-	 */
-	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
-	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
-
-	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
-	if (unlikely(size <= 0))
-		return size ? size : -EFAULT;
-
-	for (left = size, i = 0; left > 0; left -= len, i++) {
-		struct page *page = pages[i];
-		bool same_page = false;
-
-		len = min_t(size_t, PAGE_SIZE - offset, left);
-		if (bio_add_hw_page(q, bio, page, len, offset,
-				max_append_sectors, &same_page) != len) {
-			bio_put_pages(pages + i, left, offset);
-			ret = -EINVAL;
-			break;
-		}
-		if (same_page)
-			put_page(page);
-		offset = 0;
-	}
-
-	iov_iter_advance(iter, size - left);
-	return ret;
-}
-
 /**
  * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
@@ -1295,10 +1280,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	}
 
 	do {
-		if (bio_op(bio) == REQ_OP_ZONE_APPEND)
-			ret = __bio_iov_append_get_pages(bio, iter);
-		else
-			ret = __bio_iov_iter_get_pages(bio, iter);
+		ret = __bio_iov_iter_get_pages(bio, iter);
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
 
 	/* don't account direct I/O as memory stall */
-- 
cgit v1.2.3-59-g8ed1b


From 3850e13f2853a17c083eb00fad2c6da4fde0b41e Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:22 -0700
Subject: block: export dma_alignment attribute

User space may want to know how to align their buffers to avoid
bouncing. Export the queue attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610195830.3574005-4-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/stable/sysfs-block | 9 +++++++++
 block/blk-sysfs.c                    | 7 +++++++
 2 files changed, 16 insertions(+)

(limited to 'block')

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index e8797cd09aff..cd14ecb3c9a5 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -260,6 +260,15 @@ Description:
 		for discards, and don't read this file.
 
 
+What:		/sys/block/<disk>/queue/dma_alignment
+Date:		May 2022
+Contact:	linux-block@vger.kernel.org
+Description:
+		Reports the alignment that user space addresses must have to be
+		used for raw block device access with O_DIRECT and other driver
+		specific passthrough mechanisms.
+
+
 What:		/sys/block/<disk>/queue/fua
 Date:		May 2018
 Contact:	linux-block@vger.kernel.org
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9b905e9443e4..ec716ea26b92 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -274,6 +274,11 @@ static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page
 	return queue_var_show(q->limits.virt_boundary_mask, page);
 }
 
+static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_dma_alignment(q), page);
+}
+
 #define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\
 static ssize_t								\
 queue_##name##_show(struct request_queue *q, char *page)		\
@@ -606,6 +611,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
+QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
@@ -667,6 +673,7 @@ static struct attribute *queue_attrs[] = {
 	&blk_throtl_sample_time_entry.attr,
 #endif
 	&queue_virt_boundary_mask_entry.attr,
+	&queue_dma_alignment_entry.attr,
 	NULL,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 37fee2e42ebbc33e5b7b6944979792bd9aa09e34 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:24 -0700
Subject: block: add a helper function for dio alignment

This will make it easier to add more complex acceptable alignment
criteria in the future.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610195830.3574005-6-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/fops.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/fops.c b/block/fops.c
index d6b3276a6c68..9d32df6fc315 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -42,6 +42,13 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 	return op;
 }
 
+static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
+			      struct iov_iter *iter)
+{
+	return ((pos | iov_iter_alignment(iter)) &
+	    (bdev_logical_block_size(bdev) - 1));
+}
+
 #define DIO_INLINE_BIO_VECS 4
 
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
@@ -54,8 +61,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	struct bio bio;
 	ssize_t ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if (blkdev_dio_unaligned(bdev, pos, iter))
 		return -EINVAL;
 
 	if (nr_pages <= DIO_INLINE_BIO_VECS)
@@ -173,8 +179,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if (blkdev_dio_unaligned(bdev, pos, iter))
 		return -EINVAL;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@ -298,8 +303,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if (blkdev_dio_unaligned(bdev, pos, iter))
 		return -EINVAL;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
-- 
cgit v1.2.3-59-g8ed1b


From 67927d22015060967122facc8cfeaad8012e8808 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:25 -0700
Subject: block/merge: count bytes instead of sectors

Individual bv_len's may not be a sector size.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220610195830.3574005-7-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7771dacc99cb..db2e03c8af7f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -201,11 +201,11 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
  *            by the number of segments from @bv that may be appended to that
  *            bio without exceeding @max_segs
- * @sectors:  [in,out] Number of sectors in the bio being built. Incremented
- *            by the number of sectors from @bv that may be appended to that
- *            bio without exceeding @max_sectors
+ * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
+ *            by the number of bytes from @bv that may be appended to that
+ *            bio without exceeding @max_bytes
  * @max_segs: [in] upper bound for *@nsegs
- * @max_sectors: [in] upper bound for *@sectors
+ * @max_bytes: [in] upper bound for *@bytes
  *
  * When splitting a bio, it can happen that a bvec is encountered that is too
  * big to fit in a single segment and hence that it has to be split in the
@@ -216,10 +216,10 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
  */
 static bool bvec_split_segs(const struct request_queue *q,
 			    const struct bio_vec *bv, unsigned *nsegs,
-			    unsigned *sectors, unsigned max_segs,
-			    unsigned max_sectors)
+			    unsigned *bytes, unsigned max_segs,
+			    unsigned max_bytes)
 {
-	unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9;
+	unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
 	unsigned len = min(bv->bv_len, max_len);
 	unsigned total_len = 0;
 	unsigned seg_size = 0;
@@ -237,7 +237,7 @@ static bool bvec_split_segs(const struct request_queue *q,
 			break;
 	}
 
-	*sectors += total_len >> 9;
+	*bytes += total_len;
 
 	/* tell the caller to split the bvec if it is too big to fit */
 	return len > 0 || bv->bv_len > max_len;
@@ -269,8 +269,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 {
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
 	struct bvec_iter iter;
-	unsigned nsegs = 0, sectors = 0;
-	const unsigned max_sectors = get_max_io_size(q, bio);
+	unsigned nsegs = 0, bytes = 0;
+	const unsigned max_bytes = get_max_io_size(q, bio) << 9;
 	const unsigned max_segs = queue_max_segments(q);
 
 	bio_for_each_bvec(bv, bio, iter) {
@@ -282,12 +282,12 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 			goto split;
 
 		if (nsegs < max_segs &&
-		    sectors + (bv.bv_len >> 9) <= max_sectors &&
+		    bytes + bv.bv_len <= max_bytes &&
 		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
 			nsegs++;
-			sectors += bv.bv_len >> 9;
-		} else if (bvec_split_segs(q, &bv, &nsegs, &sectors, max_segs,
-					 max_sectors)) {
+			bytes += bv.bv_len;
+		} else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs,
+					   max_bytes)) {
 			goto split;
 		}
 
@@ -300,13 +300,20 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 split:
 	*segs = nsegs;
 
+	/*
+	 * Individual bvecs might not be logical block aligned. Round down the
+	 * split size so that each bio is properly block size aligned, even if
+	 * we do not use the full hardware limits.
+	 */
+	bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q));
+
 	/*
 	 * Bio splitting may cause subtle trouble such as hang when doing sync
 	 * iopoll in direct IO routine. Given performance gain of iopoll for
 	 * big IO can be trival, disable iopoll when split needed.
 	 */
 	bio_clear_polled(bio);
-	return bio_split(bio, sectors, GFP_NOIO, bs);
+	return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
 }
 
 /**
@@ -375,7 +382,7 @@ EXPORT_SYMBOL(blk_queue_split);
 unsigned int blk_recalc_rq_segments(struct request *rq)
 {
 	unsigned int nr_phys_segs = 0;
-	unsigned int nr_sectors = 0;
+	unsigned int bytes = 0;
 	struct req_iterator iter;
 	struct bio_vec bv;
 
@@ -398,7 +405,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
 	}
 
 	rq_for_each_bvec(bv, rq, iter)
-		bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors,
+		bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes,
 				UINT_MAX, UINT_MAX);
 	return nr_phys_segs;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9cfe3ddecdc556ab1d1693b29e8a26ba80953ccc Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:26 -0700
Subject: block/bounce: count bytes instead of sectors

Individual bv_len's may not be a sector size.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610195830.3574005-8-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index 8f7b6fe3b4db..c8f487af7be3 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -205,19 +205,26 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	int rw = bio_data_dir(*bio_orig);
 	struct bio_vec *to, from;
 	struct bvec_iter iter;
-	unsigned i = 0;
+	unsigned i = 0, bytes = 0;
 	bool bounce = false;
-	int sectors = 0;
+	int sectors;
 
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_VECS)
-			sectors += from.bv_len >> 9;
+			bytes += from.bv_len;
 		if (PageHighMem(from.bv_page))
 			bounce = true;
 	}
 	if (!bounce)
 		return;
 
+	/*
+	 * Individual bvecs might not be logical block aligned. Round down
+	 * the split size so that each bio is properly block size aligned,
+	 * even if we do not use the full hardware limits.
+	 */
+	sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
+			SECTOR_SHIFT;
 	if (sectors < bio_sectors(*bio_orig)) {
 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 		bio_chain(bio, *bio_orig);
-- 
cgit v1.2.3-59-g8ed1b


From b1a000d3b8ec582da64bb644be633e5a0beffcbf Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Fri, 10 Jun 2022 12:58:29 -0700
Subject: block: relax direct io memory alignment

Use the address alignment requirements from the block_device for direct
io instead of requiring addresses be aligned to the block size. User
space can discover the alignment requirements from the dma_alignment
queue attribute.

User space can specify any hardware compatible DMA offset for each
segment, but every segment length is still required to be a multiple of
the block size.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220610195830.3574005-11-kbusch@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c            | 9 +++++++++
 block/fops.c           | 4 ++--
 include/linux/blkdev.h | 5 +++++
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index ee5fe1bb015e..933ea3210954 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1220,7 +1220,16 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
+	/*
+	 * Each segment in the iov is required to be a block size multiple.
+	 * However, we may not be able to get the entire segment if it spans
+	 * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the
+	 * result to ensure the bio's total size is correct. The remainder of
+	 * the iov data will be picked up in the next bio iteration.
+	 */
 	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (size > 0)
+		size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/fops.c b/block/fops.c
index 9d32df6fc315..86d3cab9bf93 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -45,8 +45,8 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
 			      struct iov_iter *iter)
 {
-	return ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1));
+	return pos & (bdev_logical_block_size(bdev) - 1) ||
+		!bdev_iter_is_aligned(bdev, iter);
 }
 
 #define DIO_INLINE_BIO_VECS 4
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0b8bc1fe0b2c..886c44e97434 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -424,6 +424,11 @@ struct request_queue {
 	unsigned long		nr_requests;	/* Max # of requests */
 
 	unsigned int		dma_pad_mask;
+	/*
+	 * Drivers that set dma_alignment to less than 511 must be prepared to
+	 * handle individual bvec's that are not a multiple of a SECTOR_SIZE
+	 * due to possible offsets.
+	 */
 	unsigned int		dma_alignment;
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
-- 
cgit v1.2.3-59-g8ed1b


From 798f2a6f734de87633351c3ab13b17b07397cf68 Mon Sep 17 00:00:00 2001
From: Bo Liu <liubo03@inspur.com>
Date: Wed, 15 Jun 2022 04:18:16 -0400
Subject: block: Directly use ida_alloc()/free()

Use ida_alloc()/ida_free() instead of
ida_simple_get()/ida_simple_remove().
The latter is deprecated and more verbose.

Signed-off-by: Bo Liu <liubo03@inspur.com>
Reviewed-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20220615081816.4342-1-liubo03@inspur.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  | 4 ++--
 block/blk-sysfs.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 27fb1357ad4b..c2cec402d01c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -435,7 +435,7 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
 
 	q->last_merge = NULL;
 
-	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+	q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
 	if (q->id < 0)
 		goto fail_srcu;
 
@@ -485,7 +485,7 @@ fail_stats:
 fail_split:
 	bioset_exit(&q->bio_split);
 fail_id:
-	ida_simple_remove(&blk_queue_ida, q->id);
+	ida_free(&blk_queue_ida, q->id);
 fail_srcu:
 	if (alloc_srcu)
 		cleanup_srcu_struct(q->srcu);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ec716ea26b92..69e53d1a4f0e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -791,7 +791,7 @@ static void blk_release_queue(struct kobject *kobj)
 	if (blk_queue_has_srcu(q))
 		cleanup_srcu_struct(q->srcu);
 
-	ida_simple_remove(&blk_queue_ida, q->id);
+	ida_free(&blk_queue_ida, q->id);
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 62c159a03da92121b1b909fd8039028de97885fc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 15 Jun 2022 15:55:47 -0700
Subject: blk-iocost: Simplify ioc_rqos_done()

Leave out the superfluous "& REQ_OP_MASK" code. The definition of req_op()
shows that that code is superfluous:

 #define req_op(req) ((req)->cmd_flags & REQ_OP_MASK)

Compile-tested only.

Cc: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220615225549.1054905-2-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 33a11ba971ea..b7082f2aed9c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2769,7 +2769,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 	if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns)
 		return;
 
-	switch (req_op(rq) & REQ_OP_MASK) {
+	switch (req_op(rq)) {
 	case REQ_OP_READ:
 		pidx = QOS_RLAT;
 		rw = READ;
-- 
cgit v1.2.3-59-g8ed1b


From 7e923f40a4d229bd9573d6cae64479d6a9770fc3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 15 Jun 2022 15:55:48 -0700
Subject: block: Rename a blk_mq_map_queue() argument

Before the introduction of blk_mq_get_hctx_type(), blk_mq_map_queue()
only used the flags from its second argument. Since the introduction of
blk_mq_get_hctx_type(), blk_mq_map_queue() uses both the operation and
the flags encoded in that argument. Rename the second argument of
blk_mq_map_queue() to make this clear.

Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220615225549.1054905-3-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.h b/block/blk-mq.h
index 2615bd58bad3..e4c6fe2c8ac8 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -86,16 +86,16 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
 	return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
 }
 
-static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
+static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf)
 {
 	enum hctx_type type = HCTX_TYPE_DEFAULT;
 
 	/*
 	 * The caller ensure that if REQ_POLLED, poll must be enabled.
 	 */
-	if (flags & REQ_POLLED)
+	if (opf & REQ_POLLED)
 		type = HCTX_TYPE_POLL;
-	else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
+	else if ((opf & REQ_OP_MASK) == REQ_OP_READ)
 		type = HCTX_TYPE_READ;
 	return type;
 }
@@ -103,14 +103,14 @@ static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
 /*
  * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
  * @q: request queue
- * @flags: request command flags
+ * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED).
  * @ctx: software queue cpu ctx
  */
 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
-						     unsigned int flags,
+						     unsigned int opf,
 						     struct blk_mq_ctx *ctx)
 {
-	return ctx->hctxs[blk_mq_get_hctx_type(flags)];
+	return ctx->hctxs[blk_mq_get_hctx_type(opf)];
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 51ab80f0aa861335eb80327af53e444a27e824b8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 15 Jun 2022 15:55:49 -0700
Subject: block: Make blk_mq_get_sq_hctx() select the proper hardware queue
 type

Since the introduction of blk_mq_get_hctx_type() the operation type in
the second argument of blk_mq_get_hctx_type() matters. The introduction
of blk_mq_get_hctx_type() caused blk_mq_get_sq_hctx() to select a
hardware queue of type HCTX_TYPE_READ instead of HCTX_TYPE_DEFAULT.
Switch to hardware queue type HCTX_TYPE_DEFAULT since HCTX_TYPE_READ
should only be used for read requests.

Cc: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220615225549.1054905-4-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 93d9d60980fb..fa3dc4f8f35d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2156,7 +2156,7 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
 	 * just causes lock contention inside the scheduler and pointless cache
 	 * bouncing.
 	 */
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
+	struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT];
 
 	if (!blk_mq_hctx_stopped(hctx))
 		return hctx;
-- 
cgit v1.2.3-59-g8ed1b


From c887519074957f0ebd73b8158c2e0546d97ce0e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Jun 2022 11:09:31 +0200
Subject: block: open code blk_max_size_offset in blk_rq_get_max_sectors

blk_rq_get_max_sectors always uses q->limits.chunk_sectors as the
chunk_sectors argument, and already checks for max_sectors through the
call to blk_queue_get_max_sectors.  That means much of
blk_max_size_offset is not needed and open coding it simplifies the code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220614090934.570632-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index db2e03c8af7f..df003ecfbd47 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -566,17 +566,18 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
 						  sector_t offset)
 {
 	struct request_queue *q = rq->q;
+	unsigned int max_sectors;
 
 	if (blk_rq_is_passthrough(rq))
 		return q->limits.max_hw_sectors;
 
+	max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
 	if (!q->limits.chunk_sectors ||
 	    req_op(rq) == REQ_OP_DISCARD ||
 	    req_op(rq) == REQ_OP_SECURE_ERASE)
-		return blk_queue_get_max_sectors(q, req_op(rq));
-
-	return min(blk_max_size_offset(q, offset, 0),
-			blk_queue_get_max_sectors(q, req_op(rq)));
+		return max_sectors;
+	return min(max_sectors,
+		   blk_chunk_sectors_left(offset, q->limits.chunk_sectors));
 }
 
 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
-- 
cgit v1.2.3-59-g8ed1b


From 84613beda427b6e4c3d7a9ed2b68efd26300ec63 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Jun 2022 11:09:32 +0200
Subject: block: cleanup variable naming in get_max_io_size

get_max_io_size has a very odd choice of variables names and
initialization patterns.  Switch to more descriptive names and more
clear initialization of them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220614090934.570632-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index df003ecfbd47..4da981efddee 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -164,18 +164,16 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
 static inline unsigned get_max_io_size(struct request_queue *q,
 				       struct bio *bio)
 {
-	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
-	unsigned max_sectors = sectors;
 	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
 	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
-	unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);
-
-	max_sectors += start_offset;
-	max_sectors &= ~(pbs - 1);
-	if (max_sectors > start_offset)
-		return max_sectors - start_offset;
-
-	return sectors & ~(lbs - 1);
+	unsigned max_sectors, start, end;
+
+	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
+	start = bio->bi_iter.bi_sector & (pbs - 1);
+	end = (start + max_sectors) & ~(pbs - 1);
+	if (end > start)
+		return end - start;
+	return max_sectors & ~(lbs - 1);
 }
 
 static inline unsigned get_max_segment_size(const struct request_queue *q,
-- 
cgit v1.2.3-59-g8ed1b


From efef739d5f37dc998b113fb965aea68d42a9eddc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Jun 2022 11:09:33 +0200
Subject: block: fold blk_max_size_offset into get_max_io_size

Now that blk_max_size_offset has a single caller left, fold it into that
and clean up the naming convention for the local variables there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220614090934.570632-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      |  9 +++++++--
 include/linux/blkdev.h | 19 -------------------
 2 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4da981efddee..0f5f42ebd0bb 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -166,9 +166,14 @@ static inline unsigned get_max_io_size(struct request_queue *q,
 {
 	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
 	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
-	unsigned max_sectors, start, end;
+	unsigned max_sectors = queue_max_sectors(q), start, end;
+
+	if (q->limits.chunk_sectors) {
+		max_sectors = min(max_sectors,
+			blk_chunk_sectors_left(bio->bi_iter.bi_sector,
+					       q->limits.chunk_sectors));
+	}
 
-	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
 	start = bio->bi_iter.bi_sector & (pbs - 1);
 	end = (start + max_sectors) & ~(pbs - 1);
 	if (end > start)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 283961257cc9..652c357dafb9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -945,25 +945,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
 	return chunk_sectors - (offset & (chunk_sectors - 1));
 }
 
-/*
- * Return maximum size of a request at given offset. Only valid for
- * file system requests.
- */
-static inline unsigned int blk_max_size_offset(struct request_queue *q,
-					       sector_t offset,
-					       unsigned int chunk_sectors)
-{
-	if (!chunk_sectors) {
-		if (q->limits.chunk_sectors)
-			chunk_sectors = q->limits.chunk_sectors;
-		else
-			return q->limits.max_sectors;
-	}
-
-	return min(q->limits.max_sectors,
-			blk_chunk_sectors_left(offset, chunk_sectors));
-}
-
 /*
  * Access functions for manipulating queue properties
  */
-- 
cgit v1.2.3-59-g8ed1b


From 2a9336c42a6abdcef564d522648a684a474a3483 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Jun 2022 11:09:34 +0200
Subject: block: move blk_queue_get_max_sectors to blk.h

blk_queue_get_max_sectors is private to the block layer, so move it out
of blkdev.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220614090934.570632-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 13 +++++++++++++
 include/linux/blkdev.h | 13 -------------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/blk.h b/block/blk.h
index 434017701403..8e79296ee97a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -159,6 +159,19 @@ static inline bool blk_discard_mergable(struct request *req)
 	return false;
 }
 
+static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
+						     int op)
+{
+	if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
+		return min(q->limits.max_discard_sectors,
+			   UINT_MAX >> SECTOR_SHIFT);
+
+	if (unlikely(op == REQ_OP_WRITE_ZEROES))
+		return q->limits.max_write_zeroes_sectors;
+
+	return q->limits.max_sectors;
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
 bool __bio_integrity_endio(struct bio *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 652c357dafb9..b2d42201bd5d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -921,19 +921,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio)
 }
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
-						     int op)
-{
-	if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
-		return min(q->limits.max_discard_sectors,
-			   UINT_MAX >> SECTOR_SHIFT);
-
-	if (unlikely(op == REQ_OP_WRITE_ZEROES))
-		return q->limits.max_write_zeroes_sectors;
-
-	return q->limits.max_sectors;
-}
-
 /*
  * Return how much of the chunk is left to be used for I/O at a given offset.
  */
-- 
cgit v1.2.3-59-g8ed1b


From 6c77b152f5f1324972cdbdb71e9a6e02d601f49f Mon Sep 17 00:00:00 2001
From: GuoYong Zheng <zhenggy@chinatelecom.cn>
Date: Fri, 17 Jun 2022 18:28:04 +0800
Subject: bfq: Remove useless code in bfq_lookup_next_entity

It is no need to judge entity is null or not here,
directly return entity is ok, so remove it.

Signed-off-by: GuoYong Zheng <zhenggy@chinatelecom.cn>
Link: https://lore.kernel.org/r/1655461684-19075-1-git-send-email-zhenggy@chinatelecom.cn
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-wf2q.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'block')

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index f8eb340381cf..089d07022066 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1472,9 +1472,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
 			break;
 	}
 
-	if (!entity)
-		return NULL;
-
 	return entity;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c28c49b09e493adf5f79201d6de2d16d9356e9eb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 17 Jun 2022 13:44:33 -0700
Subject: block: bfq: Remove an unused function definition

This patch is the result of the analysis of a sparse report.

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220617204433.102022-1-bvanassche@acm.org
---
 block/bfq-cgroup.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'block')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 09574af83566..dc0fa93219df 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1471,8 +1471,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
 	return bfqq->bfqd->root_group;
 }
 
-void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
-
 void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
 
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
-- 
cgit v1.2.3-59-g8ed1b


From 1d87be8212c8c2bb1216a0ba49373e4e0123aaf3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 17 Jun 2022 14:08:59 -0700
Subject: block: bfq: Fix kernel-doc headers

Fix the following warnings:

block/bfq-cgroup.c:721: warning: Function parameter or member 'bfqg' not described in '__bfq_bic_change_cgroup'
block/bfq-cgroup.c:721: warning: Excess function parameter 'blkcg' description in '__bfq_bic_change_cgroup'
block/bfq-cgroup.c:870: warning: Function parameter or member 'ioprio_class' not described in 'bfq_reparent_leaf_entity'
block/bfq-cgroup.c:900: warning: Function parameter or member 'ioprio_class' not described in 'bfq_reparent_active_queues'

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220617210859.106623-1-bvanassche@acm.org
---
 block/bfq-cgroup.c | 6 ++++--
 block/bfq-wf2q.c   | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index dc0fa93219df..9fc605791b1e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -706,10 +706,10 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 }
 
 /**
- * __bfq_bic_change_cgroup - move @bic to @cgroup.
+ * __bfq_bic_change_cgroup - move @bic to @bfqg.
  * @bfqd: the queue descriptor.
  * @bic: the bic to move.
- * @blkcg: the blk-cgroup to move to.
+ * @bfqg: the group to move to.
  *
  * Move bic to blkcg, assuming that bfqd->lock is held; which makes
  * sure that the reference to cgroup is valid across the call (see
@@ -863,6 +863,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
  * @bfqd: the device data structure with the root group.
  * @entity: the entity to move, if entity is a leaf; or the parent entity
  *	    of an active leaf entity to move, if entity is not a leaf.
+ * @ioprio_class: I/O priority class to reparent.
  */
 static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
 				     struct bfq_entity *entity,
@@ -892,6 +893,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
  * @bfqd: the device data structure with the root group.
  * @bfqg: the group to move from.
  * @st: the service tree to start the search from.
+ * @ioprio_class: I/O priority class to reparent.
  */
 static void bfq_reparent_active_queues(struct bfq_data *bfqd,
 				       struct bfq_group *bfqg,
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 089d07022066..983413cdefad 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1360,6 +1360,8 @@ left:
 /**
  * __bfq_lookup_next_entity - return the first eligible entity in @st.
  * @st: the service tree.
+ * @in_service: whether or not there is an in-service entity for the sched_data
+ *	this active tree belongs to.
  *
  * If there is no in-service entity for the sched_data st belongs to,
  * then return the entity that will be set in service if:
-- 
cgit v1.2.3-59-g8ed1b


From 3c8f9da41ed90294d8ca42b3ad8a13c5379bd549 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 22 Jun 2022 10:25:54 +0200
Subject: blk-mq: Don't disable preemption around __blk_mq_run_hw_queue().

__blk_mq_delay_run_hw_queue() disables preemption to get a stable
current CPU number and then invokes __blk_mq_run_hw_queue() if the CPU
number is part the mask.

__blk_mq_run_hw_queue() acquires a spin_lock_t which is a sleeping lock
on PREEMPT_RT and can't be acquired with disabled preemption.

It is not required for correctness to invoke __blk_mq_run_hw_queue() on
a CPU matching hctx->cpumask. Both (async and direct requests) can run
on a CPU not matching hctx->cpumask.

The CPU mask without disabling preemption and invoking
__blk_mq_run_hw_queue().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/YrLSEiNvagKJaDs5@linutronix.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index fa3dc4f8f35d..62b7025d6854 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2085,14 +2085,10 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 		return;
 
 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
-		int cpu = get_cpu();
-		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+		if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
 			__blk_mq_run_hw_queue(hctx);
-			put_cpu();
 			return;
 		}
-
-		put_cpu();
 	}
 
 	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
-- 
cgit v1.2.3-59-g8ed1b


From e589f46445960c274cc813a1cc8e2fc73b2a1849 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:26 +0200
Subject: block: fix default IO priority handling again

Commit e70344c05995 ("block: fix default IO priority handling")
introduced an inconsistency in get_current_ioprio() that tasks without
IO context return IOPRIO_DEFAULT priority while tasks with freshly
allocated IO context will return 0 (IOPRIO_CLASS_NONE/0) IO priority.
Tasks without IO context used to be rare before 5a9d041ba2f6 ("block:
move io_context creation into where it's needed") but after this commit
they became common because now only BFQ IO scheduler setups task's IO
context. Similar inconsistency is there for get_task_ioprio() so this
inconsistency is now exposed to userspace and userspace will see
different IO priority for tasks operating on devices with BFQ compared
to devices without BFQ. Furthemore the changes done by commit
e70344c05995 change the behavior when no IO priority is set for BFQ IO
scheduler which is also documented in ioprio_set(2) manpage:

"If no I/O scheduler has been set for a thread, then by default the I/O
priority will follow the CPU nice value (setpriority(2)).  In Linux
kernels before version 2.6.24, once an I/O priority had been set using
ioprio_set(), there was no way to reset the I/O scheduling behavior to
the default. Since Linux 2.6.24, specifying ioprio as 0 can be used to
reset to the default I/O scheduling behavior."

So make sure we default to IOPRIO_CLASS_NONE as used to be the case
before commit e70344c05995. Also cleanup alloc_io_context() to
explicitely set this IO priority for the allocated IO context to avoid
future surprises. Note that we tweak ioprio_best() to maintain
ioprio_get(2) behavior and make this commit easily backportable.

CC: stable@vger.kernel.org
Fixes: e70344c05995 ("block: fix default IO priority handling")
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c        | 2 ++
 block/ioprio.c         | 4 ++--
 include/linux/ioprio.h | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index df9cfe4ca532..63fc02042408 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -247,6 +247,8 @@ static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 	INIT_HLIST_HEAD(&ioc->icq_list);
 	INIT_WORK(&ioc->release_work, ioc_release_fn);
 #endif
+	ioc->ioprio = IOPRIO_DEFAULT;
+
 	return ioc;
 }
 
diff --git a/block/ioprio.c b/block/ioprio.c
index 2fe068fcaad5..2a34cbca18ae 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -157,9 +157,9 @@ out:
 int ioprio_best(unsigned short aprio, unsigned short bprio)
 {
 	if (!ioprio_valid(aprio))
-		aprio = IOPRIO_DEFAULT;
+		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
 	if (!ioprio_valid(bprio))
-		bprio = IOPRIO_DEFAULT;
+		bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
 
 	return min(aprio, bprio);
 }
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 3f53bc27a19b..3d088a88f832 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -11,7 +11,7 @@
 /*
  * Default IO priority.
  */
-#define IOPRIO_DEFAULT	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
+#define IOPRIO_DEFAULT	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)
 
 /*
  * Check that a priority value has a valid class.
-- 
cgit v1.2.3-59-g8ed1b


From 893e5d32d5832674bcf6465f27958e883b72b346 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:28 +0200
Subject: block: Generalize get_current_ioprio() for any task

get_current_ioprio() operates only on current task. We will need the
same functionality for other tasks as well. Generalize
get_current_ioprio() for that and also move the bulk out of the header
file because it is large enough.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-3-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioprio.c         | 26 ++++++++++++++++++++++++++
 include/linux/ioprio.h | 26 ++++++++++----------------
 2 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/ioprio.c b/block/ioprio.c
index 2a34cbca18ae..c4e3476155a1 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -138,6 +138,32 @@ out:
 	return ret;
 }
 
+/*
+ * If the task has set an I/O priority, use that. Otherwise, return
+ * the default I/O priority.
+ *
+ * Expected to be called for current task or with task_lock() held to keep
+ * io_context stable.
+ */
+int __get_task_ioprio(struct task_struct *p)
+{
+	struct io_context *ioc = p->io_context;
+	int prio;
+
+	if (p != current)
+		lockdep_assert_held(&p->alloc_lock);
+	if (ioc)
+		prio = ioc->ioprio;
+	else
+		prio = IOPRIO_DEFAULT;
+
+	if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
+		prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
+					 task_nice_ioprio(p));
+	return prio;
+}
+EXPORT_SYMBOL_GPL(__get_task_ioprio);
+
 static int get_task_ioprio(struct task_struct *p)
 {
 	int ret;
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 61ed6bb4998e..9752cf4a9c7c 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -46,24 +46,18 @@ static inline int task_nice_ioclass(struct task_struct *task)
 		return IOPRIO_CLASS_BE;
 }
 
-/*
- * If the calling process has set an I/O priority, use that. Otherwise, return
- * the default I/O priority.
- */
-static inline int get_current_ioprio(void)
+#ifdef CONFIG_BLOCK
+int __get_task_ioprio(struct task_struct *p);
+#else
+static inline int __get_task_ioprio(struct task_struct *p)
 {
-	struct io_context *ioc = current->io_context;
-	int prio;
-
-	if (ioc)
-		prio = ioc->ioprio;
-	else
-		prio = IOPRIO_DEFAULT;
+	return IOPRIO_DEFAULT;
+}
+#endif /* CONFIG_BLOCK */
 
-	if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
-		prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
-					 task_nice_ioprio(current));
-	return prio;
+static inline int get_current_ioprio(void)
+{
+	return __get_task_ioprio(current);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From fc25545e17bd74befe0b8ab2c65ac84936be5066 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:29 +0200
Subject: block: Make ioprio_best() static

Nobody outside of block/ioprio.c uses it.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-4-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioprio.c         | 2 +-
 include/linux/ioprio.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/ioprio.c b/block/ioprio.c
index c4e3476155a1..8c46f672a0ba 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -180,7 +180,7 @@ out:
 	return ret;
 }
 
-int ioprio_best(unsigned short aprio, unsigned short bprio)
+static int ioprio_best(unsigned short aprio, unsigned short bprio)
 {
 	if (!ioprio_valid(aprio))
 		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 9752cf4a9c7c..7578d4f6a969 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -60,11 +60,6 @@ static inline int get_current_ioprio(void)
 	return __get_task_ioprio(current);
 }
 
-/*
- * For inheritance, return the highest of the two given priorities
- */
-extern int ioprio_best(unsigned short aprio, unsigned short bprio);
-
 extern int set_task_ioprio(struct task_struct *task, int ioprio);
 
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3-59-g8ed1b


From 4b838d9ee950b37bee624e301bd8e923165b1cf3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:30 +0200
Subject: block: Fix handling of tasks without ioprio in ioprio_get(2)

ioprio_get(2) can be asked to return the best IO priority from several
tasks (IOPRIO_WHO_PGRP, IOPRIO_WHO_USER). Currently the call treats
tasks without set IO priority as having priority
IOPRIO_CLASS_BE/IOPRIO_BE_NORM however this does not really reflect the
IO priority the task will get (which depends on task's nice value).

Fix the code to use the real IO priority task's IO will use. We have to
modify code for ioprio_get(IOPRIO_WHO_PROCESS) to keep returning
IOPRIO_CLASS_NONE priority for tasks without set IO priority as a
special case to maintain userspace visible API.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220623074840.5960-5-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioprio.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/ioprio.c b/block/ioprio.c
index 8c46f672a0ba..32a456b45804 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -171,10 +171,31 @@ static int get_task_ioprio(struct task_struct *p)
 	ret = security_task_getioprio(p);
 	if (ret)
 		goto out;
-	ret = IOPRIO_DEFAULT;
+	task_lock(p);
+	ret = __get_task_ioprio(p);
+	task_unlock(p);
+out:
+	return ret;
+}
+
+/*
+ * Return raw IO priority value as set by userspace. We use this for
+ * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and
+ * also so that userspace can distinguish unset IO priority (which just gets
+ * overriden based on task's nice value) from IO priority set to some value.
+ */
+static int get_task_raw_ioprio(struct task_struct *p)
+{
+	int ret;
+
+	ret = security_task_getioprio(p);
+	if (ret)
+		goto out;
 	task_lock(p);
 	if (p->io_context)
 		ret = p->io_context->ioprio;
+	else
+		ret = IOPRIO_DEFAULT;
 	task_unlock(p);
 out:
 	return ret;
@@ -182,11 +203,6 @@ out:
 
 static int ioprio_best(unsigned short aprio, unsigned short bprio)
 {
-	if (!ioprio_valid(aprio))
-		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
-	if (!ioprio_valid(bprio))
-		bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
-
 	return min(aprio, bprio);
 }
 
@@ -207,7 +223,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 			else
 				p = find_task_by_vpid(who);
 			if (p)
-				ret = get_task_ioprio(p);
+				ret = get_task_raw_ioprio(p);
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
-- 
cgit v1.2.3-59-g8ed1b


From f25865447294bf2468c2587dd98f8fa999260893 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:31 +0200
Subject: blk-ioprio: Remove unneeded field

blkcg->ioprio_set field is not really useful except for avoiding
possibly more expensive checks inside blkcg_ioprio_track(). The check
for blkcg->prio_policy being equal to POLICY_NO_CHANGE does the same
service so just remove the ioprio_set field and replace the check.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-6-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioprio.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 79e797f5d194..3f605583598b 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -62,7 +62,6 @@ struct ioprio_blkg {
 struct ioprio_blkcg {
 	struct blkcg_policy_data cpd;
 	enum prio_policy	 prio_policy;
-	bool			 prio_set;
 };
 
 static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
@@ -113,7 +112,6 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
 	if (ret < 0)
 		return ret;
 	blkcg->prio_policy = ret;
-	blkcg->prio_set = true;
 	return nbytes;
 }
 
@@ -193,16 +191,15 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
 	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
 	u16 prio;
 
-	if (!blkcg->prio_set)
+	if (blkcg->prio_policy == POLICY_NO_CHANGE)
 		return;
 
 	/*
 	 * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
 	 * correspond to a lower priority. Hence, the max_t() below selects
 	 * the lower priority of bi_ioprio and the cgroup I/O priority class.
-	 * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the
-	 * bio I/O priority is not modified. If the bio I/O priority equals
-	 * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
+	 * If the bio I/O priority equals IOPRIO_CLASS_NONE, the cgroup I/O
+	 * priority is assigned to the bio.
 	 */
 	prio = max_t(u16, bio->bi_ioprio,
 			IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
-- 
cgit v1.2.3-59-g8ed1b


From 82b74cac28493fb40ea74fb2fe648b5fc7ea0c1c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:32 +0200
Subject: blk-ioprio: Convert from rqos policy to direct call

Convert blk-ioprio handling from a rqos policy to a direct call from
blk_mq_submit_bio(). Firstly, blk-ioprio is not much of a rqos policy
anyway, it just needs a hook in bio submission path to set the bio's IO
priority. Secondly, the rqos .track hook gets actually called too late
for blk-ioprio purposes and introducing a special rqos hook just for
blk-ioprio looks even weirder.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-7-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c |  1 +
 block/blk-ioprio.c | 50 +++++---------------------------------------------
 block/blk-ioprio.h |  9 +++++++++
 block/blk-mq.c     |  8 ++++++++
 4 files changed, 23 insertions(+), 45 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 764e740b0c0f..6906981563f8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1299,6 +1299,7 @@ int blkcg_init_queue(struct request_queue *q)
 	ret = blk_iolatency_init(q);
 	if (ret) {
 		blk_throtl_exit(q);
+		blk_ioprio_exit(q);
 		goto err_destroy_all;
 	}
 
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 3f605583598b..c00060a02c6e 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -181,17 +181,12 @@ static struct blkcg_policy ioprio_policy = {
 	.pd_free_fn	= ioprio_free_pd,
 };
 
-struct blk_ioprio {
-	struct rq_qos rqos;
-};
-
-static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
-			       struct bio *bio)
+void blkcg_set_ioprio(struct bio *bio)
 {
 	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
 	u16 prio;
 
-	if (blkcg->prio_policy == POLICY_NO_CHANGE)
+	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
 		return;
 
 	/*
@@ -207,49 +202,14 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
 		bio->bi_ioprio = prio;
 }
 
-static void blkcg_ioprio_exit(struct rq_qos *rqos)
+void blk_ioprio_exit(struct request_queue *q)
 {
-	struct blk_ioprio *blkioprio_blkg =
-		container_of(rqos, typeof(*blkioprio_blkg), rqos);
-
-	blkcg_deactivate_policy(rqos->q, &ioprio_policy);
-	kfree(blkioprio_blkg);
+	blkcg_deactivate_policy(q, &ioprio_policy);
 }
 
-static struct rq_qos_ops blkcg_ioprio_ops = {
-	.track	= blkcg_ioprio_track,
-	.exit	= blkcg_ioprio_exit,
-};
-
 int blk_ioprio_init(struct request_queue *q)
 {
-	struct blk_ioprio *blkioprio_blkg;
-	struct rq_qos *rqos;
-	int ret;
-
-	blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL);
-	if (!blkioprio_blkg)
-		return -ENOMEM;
-
-	ret = blkcg_activate_policy(q, &ioprio_policy);
-	if (ret) {
-		kfree(blkioprio_blkg);
-		return ret;
-	}
-
-	rqos = &blkioprio_blkg->rqos;
-	rqos->id = RQ_QOS_IOPRIO;
-	rqos->ops = &blkcg_ioprio_ops;
-	rqos->q = q;
-
-	/*
-	 * Registering the rq-qos policy after activating the blk-cgroup
-	 * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the
-	 * rq-qos callbacks.
-	 */
-	rq_qos_add(q, rqos);
-
-	return 0;
+	return blkcg_activate_policy(q, &ioprio_policy);
 }
 
 static int __init ioprio_init(void)
diff --git a/block/blk-ioprio.h b/block/blk-ioprio.h
index a7785c2f1aea..5a1eb550e178 100644
--- a/block/blk-ioprio.h
+++ b/block/blk-ioprio.h
@@ -6,14 +6,23 @@
 #include <linux/kconfig.h>
 
 struct request_queue;
+struct bio;
 
 #ifdef CONFIG_BLK_CGROUP_IOPRIO
 int blk_ioprio_init(struct request_queue *q);
+void blk_ioprio_exit(struct request_queue *q);
+void blkcg_set_ioprio(struct bio *bio);
 #else
 static inline int blk_ioprio_init(struct request_queue *q)
 {
 	return 0;
 }
+static inline void blk_ioprio_exit(struct request_queue *q)
+{
+}
+static inline void blkcg_set_ioprio(struct bio *bio)
+{
+}
 #endif
 
 #endif /* _BLK_IOPRIO_H_ */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 62b7025d6854..4c4944b6f520 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,6 +42,7 @@
 #include "blk-stat.h"
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
+#include "blk-ioprio.h"
 
 static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
 
@@ -2779,6 +2780,11 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	return rq;
 }
 
+static void bio_set_ioprio(struct bio *bio)
+{
+	blkcg_set_ioprio(bio);
+}
+
 /**
  * blk_mq_submit_bio - Create and send a request to block device.
  * @bio: Bio pointer.
@@ -2819,6 +2825,8 @@ void blk_mq_submit_bio(struct bio *bio)
 
 	trace_block_getrq(bio);
 
+	bio_set_ioprio(bio);
+
 	rq_qos_track(q, rq, bio);
 
 	blk_mq_bio_to_request(rq, bio, nr_segs);
-- 
cgit v1.2.3-59-g8ed1b


From 9c6227e04355a430aa59709bbf869d9126112d0d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:33 +0200
Subject: block: Initialize bio priority earlier

Bio's IO priority needs to be initialized before we try to merge the bio
with other bios. Otherwise we could merge bios which would otherwise
receive different IO priorities leading to possible QoS issues.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-8-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4c4944b6f520..c0ec1938feee 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2814,6 +2814,8 @@ void blk_mq_submit_bio(struct bio *bio)
 	if (!bio_integrity_prep(bio))
 		return;
 
+	bio_set_ioprio(bio);
+
 	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
 	if (!rq) {
 		if (!bio)
@@ -2825,8 +2827,6 @@ void blk_mq_submit_bio(struct bio *bio)
 
 	trace_block_getrq(bio);
 
-	bio_set_ioprio(bio);
-
 	rq_qos_track(q, rq, bio);
 
 	blk_mq_bio_to_request(rq, bio, nr_segs);
-- 
cgit v1.2.3-59-g8ed1b


From a78418e6a04c93b9ffd3f0f601c5cb10612acb7f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2022 09:48:34 +0200
Subject: block: Always initialize bio IO priority on submit

Currently, IO priority set in task's IO context is not reflected in the
bio->bi_ioprio for most IO (only io_uring and direct IO set it). This
results in odd results where process is submitting some bios with one
priority and other bios with a different (unset) priority and due to
differing priorities bios cannot be merged. Make sure bio->bi_ioprio is
always set on bio submission.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220623074840.5960-9-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c0ec1938feee..92aae03103b7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2782,6 +2782,9 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 
 static void bio_set_ioprio(struct bio *bio)
 {
+	/* Nobody set ioprio so far? Initialize it based on task's nice value */
+	if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
+		bio->bi_ioprio = get_current_ioprio();
 	blkcg_set_ioprio(bio);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ee78ec1077d37d1a4a0860589a65df8ae6d2255c Mon Sep 17 00:00:00 2001
From: Liu Song <liusong@linux.alibaba.com>
Date: Sat, 25 Jun 2022 23:15:21 +0800
Subject: blk-mq: blk_mq_tag_busy is no need to return a value

Currently "blk_mq_tag_busy" return value has no effect, so adjust it.
Some code implementations have also been adjusted to enhance
readability.

Signed-off-by: Liu Song <liusong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1656170121-1619-1-git-send-email-liusong@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c | 18 +++++++-----------
 block/blk-mq-tag.h | 10 ++++------
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 2dcd738c6952..3cfffef1feb3 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -37,29 +37,25 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
  * to get tag when first time, the other shared-tag users could reserve
  * budget for it.
  */
-bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
+void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	unsigned int users;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
-		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
-		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
-			return true;
-		}
+		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+			return;
+		set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
 	} else {
-		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
-		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
-			return true;
-		}
+		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+			return;
+		set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
 	}
 
 	users = atomic_inc_return(&hctx->tags->active_queues);
 
 	blk_mq_update_wake_batch(hctx->tags, users);
-
-	return true;
 }
 
 /*
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 5668e28be0b7..91ff37e3b43d 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -47,15 +47,13 @@ enum {
 	BLK_MQ_TAG_MAX		= BLK_MQ_NO_TAG - 1,
 };
 
-extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
+extern void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
 extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
 
-static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
+static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
-	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
-		return false;
-
-	return __blk_mq_tag_busy(hctx);
+	if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+		__blk_mq_tag_busy(hctx);
 }
 
 static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
-- 
cgit v1.2.3-59-g8ed1b


From 1f90307e5f0d7bc9a336ead528f616a5df8e5944 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Jun 2022 08:05:49 +0200
Subject: block: remove QUEUE_FLAG_DEAD

Disallow setting the blk-mq state on any queue that is already dying as
setting the state even then is a bad idea, and remove the now unused
QUEUE_FLAG_DEAD flag.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20220619060552.1850436-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 3 ---
 block/blk-mq-debugfs.c | 8 +++-----
 include/linux/blkdev.h | 2 --
 3 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index c2cec402d01c..f86df390afad 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -313,9 +313,6 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * after draining finished.
 	 */
 	blk_freeze_queue(q);
-
-	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
-
 	blk_sync_queue(q);
 	if (queue_is_mq(q)) {
 		blk_mq_cancel_work_sync(q);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4d1ce9ef4318..b80fae7ab1d9 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -116,7 +116,6 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(NOXMERGES),
 	QUEUE_FLAG_NAME(ADD_RANDOM),
 	QUEUE_FLAG_NAME(SAME_FORCE),
-	QUEUE_FLAG_NAME(DEAD),
 	QUEUE_FLAG_NAME(INIT_DONE),
 	QUEUE_FLAG_NAME(STABLE_WRITES),
 	QUEUE_FLAG_NAME(POLL),
@@ -151,11 +150,10 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 	char opbuf[16] = { }, *op;
 
 	/*
-	 * The "state" attribute is removed after blk_cleanup_queue() has called
-	 * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid
-	 * triggering a use-after-free.
+	 * The "state" attribute is removed when the queue is removed.  Don't
+	 * allow setting the state on a dying queue to avoid a use-after-free.
 	 */
-	if (blk_queue_dead(q))
+	if (blk_queue_dying(q))
 		return -ENOENT;
 
 	if (count >= sizeof(opbuf)) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2d42201bd5d..f4632f4fe884 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -564,7 +564,6 @@ struct request_queue {
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
-#define QUEUE_FLAG_DEAD		13	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
 #define QUEUE_FLAG_STABLE_WRITES 15	/* don't modify blks until WB is done */
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
@@ -592,7 +591,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_has_srcu(q)	test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
-#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3-59-g8ed1b


From 0e3534022f26ae51f7cf28347a253230604b6f4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Jun 2022 08:05:50 +0200
Subject: block: stop setting the nomerges flags in blk_cleanup_queue

These flags only apply to file system I/O, and all file system I/O is
already drained by del_gendisk and thus can't be in progress when
blk_cleanup_queue is called.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20220619060552.1850436-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index f86df390afad..04029ffea031 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -304,9 +304,6 @@ void blk_cleanup_queue(struct request_queue *q)
 	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
 	blk_queue_start_drain(q);
 
-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
-	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-
 	/*
 	 * Drain all requests queued before DYING marking. Set DEAD flag to
 	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
-- 
cgit v1.2.3-59-g8ed1b


From 6f8191fdf41d3a53cc1d63fe2234e812c55a0092 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Jun 2022 08:05:51 +0200
Subject: block: simplify disk shutdown

Set the queue dying flag and call blk_mq_exit_queue from del_gendisk for
all disks that do not have separately allocated queues, and thus remove
the need to call blk_cleanup_queue for them.

Rename blk_cleanup_disk to blk_mq_destroy_queue to make it clear that
this function is intended only for separately allocated blk-mq queues.

This saves an extra queue freeze for devices without a separately
allocated queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20220619060552.1850436-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c                    | 37 -------------------------------
 block/blk-mq.c                      | 43 +++++++++++++++++++++++++++++++++++--
 block/blk-sysfs.c                   |  5 -----
 block/blk.h                         |  3 +++
 block/bsg-lib.c                     |  4 ++--
 block/genhd.c                       | 23 +++++++++++---------
 drivers/block/ataflop.c             |  1 -
 drivers/block/loop.c                |  1 -
 drivers/block/mtip32xx/mtip32xx.c   |  2 --
 drivers/block/rnbd/rnbd-clt.c       |  2 +-
 drivers/block/sx8.c                 |  4 ++--
 drivers/block/virtio_blk.c          |  1 -
 drivers/block/z2ram.c               |  1 -
 drivers/cdrom/gdrom.c               |  1 -
 drivers/memstick/core/ms_block.c    |  1 -
 drivers/memstick/core/mspro_block.c |  1 -
 drivers/mmc/core/block.c            |  1 -
 drivers/mmc/core/queue.c            |  1 -
 drivers/nvme/host/apple.c           |  2 +-
 drivers/nvme/host/core.c            |  1 -
 drivers/nvme/host/fc.c              | 12 +++++------
 drivers/nvme/host/pci.c             |  2 +-
 drivers/nvme/host/rdma.c            | 12 +++++------
 drivers/nvme/host/tcp.c             | 12 +++++------
 drivers/nvme/target/loop.c          | 12 +++++------
 drivers/s390/block/dasd.c           |  2 +-
 drivers/s390/block/dasd_genhd.c     |  4 ++--
 drivers/scsi/scsi_lib.c             |  6 +++---
 drivers/scsi/scsi_sysfs.c           |  2 +-
 drivers/scsi/sd.c                   |  4 ++--
 drivers/scsi/sr.c                   |  4 ++--
 drivers/ufs/core/ufshcd.c           |  4 ++--
 include/linux/blk-mq.h              |  3 +++
 include/linux/blkdev.h              |  4 +---
 34 files changed, 105 insertions(+), 113 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 04029ffea031..5ad7bd93077c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -284,43 +284,6 @@ void blk_queue_start_drain(struct request_queue *q)
 	wake_up_all(&q->mq_freeze_wq);
 }
 
-/**
- * blk_cleanup_queue - shutdown a request queue
- * @q: request queue to shutdown
- *
- * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
- * put it.  All future requests will be failed immediately with -ENODEV.
- *
- * Context: can sleep
- */
-void blk_cleanup_queue(struct request_queue *q)
-{
-	/* cannot be called from atomic context */
-	might_sleep();
-
-	WARN_ON_ONCE(blk_queue_registered(q));
-
-	/* mark @q DYING, no new request or merges will be allowed afterwards */
-	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-	blk_queue_start_drain(q);
-
-	/*
-	 * Drain all requests queued before DYING marking. Set DEAD flag to
-	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
-	 * after draining finished.
-	 */
-	blk_freeze_queue(q);
-	blk_sync_queue(q);
-	if (queue_is_mq(q)) {
-		blk_mq_cancel_work_sync(q);
-		blk_mq_exit_queue(q);
-	}
-
-	/* @q is and will stay empty, shutdown and put */
-	blk_put_queue(q);
-}
-EXPORT_SYMBOL(blk_cleanup_queue);
-
 /**
  * blk_queue_enter() - try to increase q->q_usage_counter
  * @q: request queue pointer
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 92aae03103b7..b1dbc4b2c2c9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3902,7 +3902,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 	q->queuedata = queuedata;
 	ret = blk_mq_init_allocated_queue(set, q);
 	if (ret) {
-		blk_cleanup_queue(q);
+		blk_put_queue(q);
 		return ERR_PTR(ret);
 	}
 	return q;
@@ -3914,6 +3914,35 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+/**
+ * blk_mq_destroy_queue - shutdown a request queue
+ * @q: request queue to shutdown
+ *
+ * This shuts down a request queue allocated by blk_mq_init_queue() and drops
+ * the initial reference.  All future requests will failed with -ENODEV.
+ *
+ * Context: can sleep
+ */
+void blk_mq_destroy_queue(struct request_queue *q)
+{
+	WARN_ON_ONCE(!queue_is_mq(q));
+	WARN_ON_ONCE(blk_queue_registered(q));
+
+	might_sleep();
+
+	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+	blk_queue_start_drain(q);
+	blk_freeze_queue(q);
+
+	blk_sync_queue(q);
+	blk_mq_cancel_work_sync(q);
+	blk_mq_exit_queue(q);
+
+	/* @q is and will stay empty, shutdown and put */
+	blk_put_queue(q);
+}
+EXPORT_SYMBOL(blk_mq_destroy_queue);
+
 struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
 		struct lock_class_key *lkclass)
 {
@@ -3926,13 +3955,23 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
 
 	disk = __alloc_disk_node(q, set->numa_node, lkclass);
 	if (!disk) {
-		blk_cleanup_queue(q);
+		blk_put_queue(q);
 		return ERR_PTR(-ENOMEM);
 	}
+	set_bit(GD_OWNS_QUEUE, &disk->state);
 	return disk;
 }
 EXPORT_SYMBOL(__blk_mq_alloc_disk);
 
+struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
+		struct lock_class_key *lkclass)
+{
+	if (!blk_get_queue(q))
+		return NULL;
+	return __alloc_disk_node(q, NUMA_NO_NODE, lkclass);
+}
+EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
+
 static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
 		struct blk_mq_tag_set *set, struct request_queue *q,
 		int hctx_idx, int node)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 69e53d1a4f0e..9b211e519de8 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -755,11 +755,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
  * decremented with blk_put_queue(). Once the refcount reaches 0 this function
  * is called.
  *
- * For drivers that have a request_queue on a gendisk and added with
- * __device_add_disk() the refcount to request_queue will reach 0 with
- * the last put_disk() called by the driver. For drivers which don't use
- * __device_add_disk() this happens with blk_cleanup_queue().
- *
  * Drivers exist which depend on the release of the request_queue to be
  * synchronous, it should not be deferred.
  *
diff --git a/block/blk.h b/block/blk.h
index 8e79296ee97a..1a0d3e6a4a63 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -424,6 +424,9 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
 		sector_t length);
 void blk_drop_partitions(struct gendisk *disk);
 
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+		struct lock_class_key *lkclass);
+
 int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
 		unsigned int max_sectors, bool *same_page);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index acfe1357bf6c..fd4cd5e68282 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -324,7 +324,7 @@ void bsg_remove_queue(struct request_queue *q)
 			container_of(q->tag_set, struct bsg_set, tag_set);
 
 		bsg_unregister_queue(bset->bd);
-		blk_cleanup_queue(q);
+		blk_mq_destroy_queue(q);
 		blk_mq_free_tag_set(&bset->tag_set);
 		kfree(bset);
 	}
@@ -399,7 +399,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 
 	return q;
 out_cleanup_queue:
-	blk_cleanup_queue(q);
+	blk_mq_destroy_queue(q);
 out_queue:
 	blk_mq_free_tag_set(set);
 out_tag_set:
diff --git a/block/genhd.c b/block/genhd.c
index 278227ba1d53..4d15f828c449 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -617,6 +617,8 @@ void del_gendisk(struct gendisk *disk)
 	 * Fail any new I/O.
 	 */
 	set_bit(GD_DEAD, &disk->state);
+	if (test_bit(GD_OWNS_QUEUE, &disk->state))
+		blk_queue_flag_set(QUEUE_FLAG_DYING, q);
 	set_capacity(disk, 0);
 
 	/*
@@ -663,11 +665,16 @@ void del_gendisk(struct gendisk *disk)
 	blk_mq_unquiesce_queue(q);
 
 	/*
-	 * Allow using passthrough request again after the queue is torn down.
+	 * If the disk does not own the queue, allow using passthrough requests
+	 * again.  Else leave the queue frozen to fail all I/O.
 	 */
-	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
-	__blk_mq_unfreeze_queue(q, true);
-
+	if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
+		blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
+		__blk_mq_unfreeze_queue(q, true);
+	} else {
+		if (queue_is_mq(q))
+			blk_mq_exit_queue(q);
+	}
 }
 EXPORT_SYMBOL(del_gendisk);
 
@@ -1338,9 +1345,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 {
 	struct gendisk *disk;
 
-	if (!blk_get_queue(q))
-		return NULL;
-
 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
 	if (!disk)
 		goto out_put_queue;
@@ -1391,7 +1395,6 @@ out_put_queue:
 	blk_put_queue(q);
 	return NULL;
 }
-EXPORT_SYMBOL(__alloc_disk_node);
 
 struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
 {
@@ -1404,9 +1407,10 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
 
 	disk = __alloc_disk_node(q, node, lkclass);
 	if (!disk) {
-		blk_cleanup_queue(q);
+		blk_put_queue(q);
 		return NULL;
 	}
+	set_bit(GD_OWNS_QUEUE, &disk->state);
 	return disk;
 }
 EXPORT_SYMBOL(__blk_alloc_disk);
@@ -1439,7 +1443,6 @@ EXPORT_SYMBOL(put_disk);
  */
 void blk_cleanup_disk(struct gendisk *disk)
 {
-	blk_cleanup_queue(disk->queue);
 	put_disk(disk);
 }
 EXPORT_SYMBOL(blk_cleanup_disk);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index e232cc4fd444..c6e41ee18aaa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -2045,7 +2045,6 @@ static void atari_floppy_cleanup(void)
 			if (!unit[i].disk[type])
 				continue;
 			del_gendisk(unit[i].disk[type]);
-			blk_cleanup_queue(unit[i].disk[type]->queue);
 			put_disk(unit[i].disk[type]);
 		}
 		blk_mq_free_tag_set(&unit[i].tag_set);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 084f9b8a0ba3..cc608226c8c7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2057,7 +2057,6 @@ static void loop_remove(struct loop_device *lo)
 {
 	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
-	blk_cleanup_queue(lo->lo_disk->queue);
 	blk_mq_free_tag_set(&lo->tag_set);
 
 	mutex_lock(&loop_ctl_mutex);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e7604b3bf8a7..1d0e0a9fdd7c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3565,7 +3565,6 @@ static int mtip_block_shutdown(struct driver_data *dd)
 	if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
 		del_gendisk(dd->disk);
 
-	blk_cleanup_queue(dd->queue);
 	blk_mq_free_tag_set(&dd->tags);
 	put_disk(dd->disk);
 	return 0;
@@ -3914,7 +3913,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
 						dd->disk->disk_name);
 
-	blk_cleanup_queue(dd->queue);
 	blk_mq_free_tag_set(&dd->tags);
 
 	/* De-initialize the protocol layer. */
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 409c76b81aed..a4470374f54f 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1755,7 +1755,7 @@ static void rnbd_destroy_sessions(void)
 		list_for_each_entry_safe(dev, tn, &sess->devs_list, list) {
 			/*
 			 * Here unmap happens in parallel for only one reason:
-			 * blk_cleanup_queue() takes around half a second, so
+			 * del_gendisk() takes around half a second, so
 			 * on huge amount of devices the whole module unload
 			 * procedure takes minutes.
 			 */
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 63b4f6431d2e..75057dbbcfbe 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1536,7 +1536,7 @@ err_out_free_majors:
 		clear_bit(0, &carm_major_alloc);
 	else if (host->major == 161)
 		clear_bit(1, &carm_major_alloc);
-	blk_cleanup_queue(host->oob_q);
+	blk_mq_destroy_queue(host->oob_q);
 	blk_mq_free_tag_set(&host->tag_set);
 err_out_dma_free:
 	dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
@@ -1570,7 +1570,7 @@ static void carm_remove_one (struct pci_dev *pdev)
 		clear_bit(0, &carm_major_alloc);
 	else if (host->major == 161)
 		clear_bit(1, &carm_major_alloc);
-	blk_cleanup_queue(host->oob_q);
+	blk_mq_destroy_queue(host->oob_q);
 	blk_mq_free_tag_set(&host->tag_set);
 	dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
 	iounmap(host->mmio);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6fc7850c2b0a..cff1b6f6b054 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1111,7 +1111,6 @@ static void virtblk_remove(struct virtio_device *vdev)
 	flush_work(&vblk->config_work);
 
 	del_gendisk(vblk->disk);
-	blk_cleanup_queue(vblk->disk->queue);
 	blk_mq_free_tag_set(&vblk->tag_set);
 
 	mutex_lock(&vblk->vdev_mutex);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 7a6ed83481b8..18ad43d9933e 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -384,7 +384,6 @@ static void __exit z2_exit(void)
 
 	for (i = 0; i < Z2MINOR_COUNT; i++) {
 		del_gendisk(z2ram_gendisk[i]);
-		blk_cleanup_queue(z2ram_gendisk[i]->queue);
 		put_disk(z2ram_gendisk[i]);
 	}
 	blk_mq_free_tag_set(&tag_set);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 8e78b37d0f6a..f4cc90ea6198 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -831,7 +831,6 @@ probe_fail_no_mem:
 
 static int remove_gdrom(struct platform_device *devptr)
 {
-	blk_cleanup_queue(gd.gdrom_rq);
 	blk_mq_free_tag_set(&gd.tag_set);
 	free_irq(HW_EVENT_GDROM_CMD, &gd);
 	free_irq(HW_EVENT_GDROM_DMA, &gd);
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 3993bdd4b519..ba7e7249a3db 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2187,7 +2187,6 @@ static void msb_remove(struct memstick_dev *card)
 
 	/* Remove the disk */
 	del_gendisk(msb->disk);
-	blk_cleanup_queue(msb->queue);
 	blk_mq_free_tag_set(&msb->tag_set);
 	msb->queue = NULL;
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 725ba74ded30..72e91c06c618 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1294,7 +1294,6 @@ static void mspro_block_remove(struct memstick_dev *card)
 	del_gendisk(msb->disk);
 	dev_dbg(&card->dev, "mspro block remove\n");
 
-	blk_cleanup_queue(msb->queue);
 	blk_mq_free_tag_set(&msb->tag_set);
 	msb->queue = NULL;
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index f4a1281658db..bda6c67ce93f 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2509,7 +2509,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	return md;
 
  err_cleanup_queue:
-	blk_cleanup_queue(md->disk->queue);
 	blk_mq_free_tag_set(&md->queue.tag_set);
  err_kfree:
 	kfree(md);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index fa5324ceeebe..f824cfdab75a 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -494,7 +494,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 	if (blk_queue_quiesced(q))
 		blk_mq_unquiesce_queue(q);
 
-	blk_cleanup_queue(q);
 	blk_mq_free_tag_set(&mq->tag_set);
 
 	/*
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index d702d7d60235..2d23b7d41f7e 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1502,7 +1502,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 	if (!blk_get_queue(anv->ctrl.admin_q)) {
 		nvme_start_admin_queue(&anv->ctrl);
-		blk_cleanup_queue(anv->ctrl.admin_q);
+		blk_mq_destroy_queue(anv->ctrl.admin_q);
 		anv->ctrl.admin_q = NULL;
 		ret = -ENODEV;
 		goto put_dev;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b3d9c29aba1e..4e3a0f7bfc9c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4103,7 +4103,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
 	del_gendisk(ns->disk);
-	blk_cleanup_queue(ns->queue);
 
 	down_write(&ns->ctrl->namespaces_rwsem);
 	list_del_init(&ns->list);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 3c778bb0c294..a96aa831684c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2392,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref)
 	unsigned long flags;
 
 	if (ctrl->ctrl.tagset) {
-		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(&ctrl->tag_set);
 	}
 
@@ -2402,8 +2402,8 @@ nvme_fc_ctrl_free(struct kref *ref)
 	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
 
 	nvme_start_admin_queue(&ctrl->ctrl);
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 
 	kfree(ctrl->queues);
@@ -2953,7 +2953,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 out_delete_hw_queues:
 	nvme_fc_delete_hw_io_queues(ctrl);
 out_cleanup_blk_queue:
-	blk_cleanup_queue(ctrl->ctrl.connect_q);
+	blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	blk_mq_free_tag_set(&ctrl->tag_set);
 	nvme_fc_free_io_queues(ctrl);
@@ -3642,9 +3642,9 @@ fail_ctrl:
 	return ERR_PTR(-EIO);
 
 out_cleanup_admin_q:
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
 out_cleanup_fabrics_q:
-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 out_free_admin_tag_set:
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 out_free_queues:
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d7b24ee17285..247a74aba336 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1760,7 +1760,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
 		 * queue to flush these to completion.
 		 */
 		nvme_start_admin_queue(&dev->ctrl);
-		blk_cleanup_queue(dev->ctrl.admin_q);
+		blk_mq_destroy_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
 }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f2a5e1ea508a..0fb7c8e7ab0b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -840,8 +840,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		bool remove)
 {
 	if (remove) {
-		blk_cleanup_queue(ctrl->ctrl.admin_q);
-		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
 	}
 	if (ctrl->async_event_sqe.data) {
@@ -935,10 +935,10 @@ out_stop_queue:
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
 out_cleanup_queue:
 	if (new)
-		blk_cleanup_queue(ctrl->ctrl.admin_q);
+		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
 out_cleanup_fabrics_q:
 	if (new)
-		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 out_free_tagset:
 	if (new)
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
@@ -957,7 +957,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
 		bool remove)
 {
 	if (remove) {
-		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(ctrl->ctrl.tagset);
 	}
 	nvme_rdma_free_io_queues(ctrl);
@@ -1012,7 +1012,7 @@ out_wait_freeze_timed_out:
 out_cleanup_connect_q:
 	nvme_cancel_tagset(&ctrl->ctrl);
 	if (new)
-		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	if (new)
 		blk_mq_free_tag_set(ctrl->ctrl.tagset);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index bb67538d241b..b81942fa5f95 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1885,7 +1885,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
 {
 	nvme_tcp_stop_io_queues(ctrl);
 	if (remove) {
-		blk_cleanup_queue(ctrl->connect_q);
+		blk_mq_destroy_queue(ctrl->connect_q);
 		blk_mq_free_tag_set(ctrl->tagset);
 	}
 	nvme_tcp_free_io_queues(ctrl);
@@ -1940,7 +1940,7 @@ out_wait_freeze_timed_out:
 out_cleanup_connect_q:
 	nvme_cancel_tagset(ctrl);
 	if (new)
-		blk_cleanup_queue(ctrl->connect_q);
+		blk_mq_destroy_queue(ctrl->connect_q);
 out_free_tag_set:
 	if (new)
 		blk_mq_free_tag_set(ctrl->tagset);
@@ -1953,8 +1953,8 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
 {
 	nvme_tcp_stop_queue(ctrl, 0);
 	if (remove) {
-		blk_cleanup_queue(ctrl->admin_q);
-		blk_cleanup_queue(ctrl->fabrics_q);
+		blk_mq_destroy_queue(ctrl->admin_q);
+		blk_mq_destroy_queue(ctrl->fabrics_q);
 		blk_mq_free_tag_set(ctrl->admin_tagset);
 	}
 	nvme_tcp_free_admin_queue(ctrl);
@@ -2012,10 +2012,10 @@ out_stop_queue:
 	nvme_cancel_admin_tagset(ctrl);
 out_cleanup_queue:
 	if (new)
-		blk_cleanup_queue(ctrl->admin_q);
+		blk_mq_destroy_queue(ctrl->admin_q);
 out_cleanup_fabrics_q:
 	if (new)
-		blk_cleanup_queue(ctrl->fabrics_q);
+		blk_mq_destroy_queue(ctrl->fabrics_q);
 out_free_tagset:
 	if (new)
 		blk_mq_free_tag_set(ctrl->admin_tagset);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 59024af2da2e..0f5c77e22a0a 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -266,8 +266,8 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
 		return;
 	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 }
 
@@ -283,7 +283,7 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
 	if (nctrl->tagset) {
-		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(&ctrl->tag_set);
 	}
 	kfree(ctrl->queues);
@@ -410,9 +410,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 out_cleanup_queue:
 	clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
 out_cleanup_fabrics_q:
-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
+	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
 out_free_tagset:
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 out_free_sq:
@@ -554,7 +554,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 	return 0;
 
 out_cleanup_connect_q:
-	blk_cleanup_queue(ctrl->ctrl.connect_q);
+	blk_mq_destroy_queue(ctrl->ctrl.connect_q);
 out_free_tagset:
 	blk_mq_free_tag_set(&ctrl->tag_set);
 out_destroy_queues:
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ba6d78789660..e8489331f12b 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3280,7 +3280,7 @@ static int dasd_alloc_queue(struct dasd_block *block)
 static void dasd_free_queue(struct dasd_block *block)
 {
 	if (block->request_queue) {
-		blk_cleanup_queue(block->request_queue);
+		blk_mq_destroy_queue(block->request_queue);
 		blk_mq_free_tag_set(&block->tag_set);
 		block->request_queue = NULL;
 	}
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index a7a33ebf4bbe..5a83f0a39901 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -41,8 +41,8 @@ int dasd_gendisk_alloc(struct dasd_block *block)
 	if (base->devindex >= DASD_PER_MAJOR)
 		return -EBUSY;
 
-	gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE,
-				&dasd_bio_compl_lkclass);
+	gdp = blk_mq_alloc_disk_for_queue(block->request_queue,
+					  &dasd_bio_compl_lkclass);
 	if (!gdp)
 		return -ENOMEM;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6ffc9e4258a8..cdf0056582d5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -163,7 +163,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
 	 * Requeue this command.  It will go before all other commands
 	 * that are already in the queue. Schedule requeue work under
 	 * lock such that the kblockd_schedule_work() call happens
-	 * before blk_cleanup_queue() finishes.
+	 * before blk_mq_destroy_queue() finishes.
 	 */
 	cmd->result = 0;
 
@@ -424,9 +424,9 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
 		 * it and the queue.  Mitigate by taking a reference to the
 		 * queue and never touching the sdev again after we drop the
 		 * host lock.  Note: if __scsi_remove_device() invokes
-		 * blk_cleanup_queue() before the queue is run from this
+		 * blk_mq_destroy_queue() before the queue is run from this
 		 * function then blk_run_queue() will return immediately since
-		 * blk_cleanup_queue() marks the queue with QUEUE_FLAG_DYING.
+		 * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING.
 		 */
 		slq = sdev->request_queue;
 		if (!blk_get_queue(slq))
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 43949798a2e4..aa70d9282161 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1475,7 +1475,7 @@ void __scsi_remove_device(struct scsi_device *sdev)
 	scsi_device_set_state(sdev, SDEV_DEL);
 	mutex_unlock(&sdev->state_mutex);
 
-	blk_cleanup_queue(sdev->request_queue);
+	blk_mq_destroy_queue(sdev->request_queue);
 	cancel_work_sync(&sdev->requeue_work);
 
 	if (sdev->host->hostt->slave_destroy)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a1a2ac09066f..cb587e488601 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3440,8 +3440,8 @@ static int sd_probe(struct device *dev)
 	if (!sdkp)
 		goto out;
 
-	gd = __alloc_disk_node(sdp->request_queue, NUMA_NO_NODE,
-			       &sd_bio_compl_lkclass);
+	gd = blk_mq_alloc_disk_for_queue(sdp->request_queue,
+					 &sd_bio_compl_lkclass);
 	if (!gd)
 		goto out_free;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 32d3b8274f14..a278b739d0c5 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -624,8 +624,8 @@ static int sr_probe(struct device *dev)
 	if (!cd)
 		goto fail;
 
-	disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
-				 &sr_bio_compl_lkclass);
+	disk = blk_mq_alloc_disk_for_queue(sdev->request_queue,
+					   &sr_bio_compl_lkclass);
 	if (!disk)
 		goto fail_free;
 	mutex_init(&cd->lock);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index ce86d1b790c0..91d8852daaa9 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -9487,7 +9487,7 @@ void ufshcd_remove(struct ufs_hba *hba)
 	ufs_bsg_remove(hba);
 	ufshpb_remove(hba);
 	ufs_sysfs_remove_nodes(hba->dev);
-	blk_cleanup_queue(hba->tmf_queue);
+	blk_mq_destroy_queue(hba->tmf_queue);
 	blk_mq_free_tag_set(&hba->tmf_tag_set);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
@@ -9783,7 +9783,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	return 0;
 
 free_tmf_queue:
-	blk_cleanup_queue(hba->tmf_queue);
+	blk_mq_destroy_queue(hba->tmf_queue);
 free_tmf_tag_set:
 	blk_mq_free_tag_set(&hba->tmf_tag_set);
 out_remove_scsi_host:
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e2d9daf7e8dd..0fd96e92c6c6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -686,10 +686,13 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
 									\
 	__blk_mq_alloc_disk(set, queuedata, &__key);			\
 })
+struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
+		struct lock_class_key *lkclass);
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		struct request_queue *q);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
+void blk_mq_destroy_queue(struct request_queue *);
 
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
 int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f4632f4fe884..530eeccffda3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -148,6 +148,7 @@ struct gendisk {
 #define GD_NATIVE_CAPACITY		3
 #define GD_ADDED			4
 #define GD_SUPPRESS_PART_SCAN		5
+#define GD_OWNS_QUEUE			6
 
 	struct mutex open_mutex;	/* open/close mutex */
 	unsigned open_partitions;	/* number of open partitions */
@@ -815,8 +816,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
 
 int bdev_disk_changed(struct gendisk *disk, bool invalidate);
 
-struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
-		struct lock_class_key *lkclass);
 void put_disk(struct gendisk *disk);
 struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
 
@@ -933,7 +932,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
 /*
  * Access functions for manipulating queue properties
  */
-extern void blk_cleanup_queue(struct request_queue *);
 void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
-- 
cgit v1.2.3-59-g8ed1b


From 8b9ab62662048a3274361c7e5f64037c2c133e2c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Jun 2022 08:05:52 +0200
Subject: block: remove blk_cleanup_disk

blk_cleanup_disk is nothing but a trivial wrapper for put_disk now,
so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20220619060552.1850436-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/emu/nfblock.c             |  4 ++--
 arch/um/drivers/ubd_kern.c          |  4 ++--
 arch/xtensa/platforms/iss/simdisk.c |  4 ++--
 block/genhd.c                       | 15 ---------------
 drivers/block/amiflop.c             |  2 +-
 drivers/block/aoe/aoeblk.c          |  2 +-
 drivers/block/aoe/aoedev.c          |  2 +-
 drivers/block/ataflop.c             |  4 ++--
 drivers/block/brd.c                 |  4 ++--
 drivers/block/drbd/drbd_main.c      |  4 ++--
 drivers/block/floppy.c              |  6 +++---
 drivers/block/loop.c                |  2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  2 +-
 drivers/block/n64cart.c             |  2 +-
 drivers/block/nbd.c                 |  4 ++--
 drivers/block/null_blk/main.c       |  4 ++--
 drivers/block/paride/pcd.c          |  4 ++--
 drivers/block/paride/pd.c           |  4 ++--
 drivers/block/paride/pf.c           |  4 ++--
 drivers/block/pktcdvd.c             |  4 ++--
 drivers/block/ps3disk.c             |  4 ++--
 drivers/block/ps3vram.c             |  4 ++--
 drivers/block/rbd.c                 |  2 +-
 drivers/block/rnbd/rnbd-clt.c       |  4 ++--
 drivers/block/sunvdc.c              |  4 ++--
 drivers/block/swim.c                |  2 +-
 drivers/block/swim3.c               |  2 +-
 drivers/block/sx8.c                 |  2 +-
 drivers/block/virtio_blk.c          |  2 +-
 drivers/block/xen-blkfront.c        |  4 ++--
 drivers/block/z2ram.c               |  2 +-
 drivers/block/zram/zram_drv.c       |  4 ++--
 drivers/cdrom/gdrom.c               |  2 +-
 drivers/md/bcache/super.c           |  2 +-
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  4 ++--
 drivers/memstick/core/ms_block.c    |  2 +-
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/mtd/mtd_blkdevs.c           |  4 ++--
 drivers/mtd/ubi/block.c             |  4 ++--
 drivers/nvdimm/btt.c                |  4 ++--
 drivers/nvdimm/pmem.c               |  4 ++--
 drivers/nvme/host/core.c            |  2 +-
 drivers/nvme/host/multipath.c       |  2 +-
 drivers/s390/block/dcssblk.c        |  8 ++++----
 drivers/s390/block/scm_blk.c        |  4 ++--
 include/linux/blkdev.h              |  1 -
 47 files changed, 74 insertions(+), 90 deletions(-)

(limited to 'block')

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 267b02cc5655..a708fbd5a844 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(dev->disk);
+	put_disk(dev->disk);
 free_dev:
 	kfree(dev);
 out:
@@ -180,7 +180,7 @@ static void __exit nfhd_exit(void)
 	list_for_each_entry_safe(dev, next, &nfhd_list, list) {
 		list_del(&dev->list);
 		del_gendisk(dev->disk);
-		blk_cleanup_disk(dev->disk);
+		put_disk(dev->disk);
 		kfree(dev);
 	}
 	unregister_blkdev(major_num, "nfhd");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index c4344b67628d..479b79e11442 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -925,7 +925,7 @@ static int ubd_add(int n, char **error_out)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_cleanup_tags:
 	blk_mq_free_tag_set(&ubd_dev->tag_set);
 out:
@@ -1032,7 +1032,7 @@ static int ubd_remove(int n, char **error_out)
 	ubd_gendisk[n] = NULL;
 	if(disk != NULL){
 		del_gendisk(disk);
-		blk_cleanup_disk(disk);
+		put_disk(disk);
 	}
 
 	err = 0;
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 4255b92fa3eb..f50caaa1c249 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -290,7 +290,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(dev->gd);
+	put_disk(dev->gd);
 out:
 	return err;
 }
@@ -344,7 +344,7 @@ static void simdisk_teardown(struct simdisk *dev, int which,
 	simdisk_detach(dev);
 	if (dev->gd) {
 		del_gendisk(dev->gd);
-		blk_cleanup_disk(dev->gd);
+		put_disk(dev->gd);
 	}
 	remove_proc_entry(tmp, procdir);
 }
diff --git a/block/genhd.c b/block/genhd.c
index 4d15f828c449..bf9be06af2c8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1432,21 +1432,6 @@ void put_disk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(put_disk);
 
-/**
- * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
- * @disk: gendisk to shutdown
- *
- * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
- * the queue DEAD, destroy and put it and the gendisk structure.
- *
- * Context: can sleep
- */
-void blk_cleanup_disk(struct gendisk *disk)
-{
-	put_disk(disk);
-}
-EXPORT_SYMBOL(blk_cleanup_disk);
-
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 {
 	char event[] = "DISK_RO=1";
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 5a566f2fd533..4c8b2ba579ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1802,7 +1802,7 @@ static int fd_alloc_disk(int drive, int system)
 	unit[drive].gendisk[system] = disk;
 	err = add_disk(disk);
 	if (err)
-		blk_cleanup_disk(disk);
+		put_disk(disk);
 	return err;
 }
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 348adf335217..12b3ca8f6f4a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -427,7 +427,7 @@ aoeblk_gdalloc(void *vp)
 	return;
 
 out_disk_cleanup:
-	blk_cleanup_disk(gd);
+	put_disk(gd);
 err_tagset:
 	blk_mq_free_tag_set(set);
 err_mempool:
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index b381d1c3ef32..3523dd82d7a0 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -277,7 +277,7 @@ freedev(struct aoedev *d)
 	if (d->gd) {
 		aoedisk_rm_debugfs(d);
 		del_gendisk(d->gd);
-		blk_cleanup_disk(d->gd);
+		put_disk(d->gd);
 		blk_mq_free_tag_set(&d->tag_set);
 	}
 	t = d->targets;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index c6e41ee18aaa..9deb4df6bdb8 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -2031,7 +2031,7 @@ static void ataflop_probe(dev_t dev)
 	return;
 
 cleanup_disk:
-	blk_cleanup_disk(unit[drive].disk[type]);
+	put_disk(unit[drive].disk[type]);
 	unit[drive].disk[type] = NULL;
 }
 
@@ -2063,7 +2063,7 @@ static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs)
 			continue;
 		if (fs->registered[type])
 			del_gendisk(fs->disk[type]);
-		blk_cleanup_disk(fs->disk[type]);
+		put_disk(fs->disk[type]);
 	}
 	blk_mq_free_tag_set(&fs->tag_set);
 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 6e3f2f0d2352..9e26d5e769f3 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -419,7 +419,7 @@ static int brd_alloc(int i)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_free_dev:
 	list_del(&brd->brd_list);
 	kfree(brd);
@@ -439,7 +439,7 @@ static void brd_cleanup(void)
 
 	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
 		del_gendisk(brd->brd_disk);
-		blk_cleanup_disk(brd->brd_disk);
+		put_disk(brd->brd_disk);
 		brd_free_pages(brd);
 		list_del(&brd->brd_list);
 		kfree(brd);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2887350ae010..f3e4db16fd07 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2207,7 +2207,7 @@ void drbd_destroy_device(struct kref *kref)
 	if (device->bitmap) /* should no longer be there. */
 		drbd_bm_cleanup(device);
 	__free_page(device->md_io.page);
-	blk_cleanup_disk(device->vdisk);
+	put_disk(device->vdisk);
 	kfree(device->rs_plan_s);
 
 	/* not for_each_connection(connection, resource):
@@ -2807,7 +2807,7 @@ out_no_minor_idr:
 out_no_bitmap:
 	__free_page(device->md_io.page);
 out_no_io_page:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_no_disk:
 	kref_put(&resource->kref, drbd_destroy_resource);
 	kfree(device);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 015841f50f4e..491e7205a0db 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4557,7 +4557,7 @@ out:
 	return;
 
 cleanup_disk:
-	blk_cleanup_disk(disks[drive][type]);
+	put_disk(disks[drive][type]);
 	disks[drive][type] = NULL;
 	mutex_unlock(&floppy_probe_lock);
 }
@@ -4753,7 +4753,7 @@ out_put_disk:
 		if (!disks[drive][0])
 			break;
 		del_timer_sync(&motor_off_timer[drive]);
-		blk_cleanup_disk(disks[drive][0]);
+		put_disk(disks[drive][0]);
 		blk_mq_free_tag_set(&tag_sets[drive]);
 	}
 	return err;
@@ -4985,7 +4985,7 @@ static void __exit floppy_module_exit(void)
 		}
 		for (i = 0; i < ARRAY_SIZE(floppy_type); i++) {
 			if (disks[drive][i])
-				blk_cleanup_disk(disks[drive][i]);
+				put_disk(disks[drive][i]);
 		}
 		blk_mq_free_tag_set(&tag_sets[drive]);
 	}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc608226c8c7..e3c0ba93c1a3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2040,7 +2040,7 @@ static int loop_add(int i)
 	return i;
 
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 1d0e0a9fdd7c..e116c6cf56f5 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3534,7 +3534,7 @@ init_hw_cmds_error:
 disk_index_error:
 	ida_free(&rssd_index_ida, index);
 ida_get_error:
-	blk_cleanup_disk(dd->disk);
+	put_disk(dd->disk);
 block_queue_alloc_init_error:
 	blk_mq_free_tag_set(&dd->tags);
 block_queue_alloc_tag_error:
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index e094d2b8b5a9..d914156db2d8 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -157,7 +157,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out:
 	return err;
 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 07f3c139a3d7..5c4c9c45c6ac 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -250,7 +250,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
 	struct gendisk *disk = nbd->disk;
 
 	del_gendisk(disk);
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 	blk_mq_free_tag_set(&nbd->tag_set);
 
 	/*
@@ -1833,7 +1833,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
 out_free_work:
 	destroy_workqueue(nbd->recv_workq);
 out_err_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_free_idr:
 	mutex_lock(&nbd_index_mutex);
 	idr_remove(&nbd_index_idr, index);
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 6b67088f4ea7..d695ea29efa6 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1737,7 +1737,7 @@ static void null_del_dev(struct nullb *nullb)
 		null_restart_queue_async(nullb);
 	}
 
-	blk_cleanup_disk(nullb->disk);
+	put_disk(nullb->disk);
 	if (dev->queue_mode == NULL_Q_MQ &&
 	    nullb->tag_set == &nullb->__tag_set)
 		blk_mq_free_tag_set(nullb->tag_set);
@@ -2082,7 +2082,7 @@ static int null_add_dev(struct nullb_device *dev)
 out_cleanup_zone:
 	null_free_zoned_dev(dev);
 out_cleanup_disk:
-	blk_cleanup_disk(nullb->disk);
+	put_disk(nullb->disk);
 out_cleanup_tags:
 	if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
 		blk_mq_free_tag_set(nullb->tag_set);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index f462ad67931a..a5ab40784119 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -956,7 +956,7 @@ out_unreg_cdrom:
 out_pi_release:
 	pi_release(cd->pi);
 out_free_disk:
-	blk_cleanup_disk(cd->disk);
+	put_disk(cd->disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(&cd->tag_set);
 	return ret;
@@ -1029,7 +1029,7 @@ static void __exit pcd_exit(void)
 		unregister_cdrom(&cd->info);
 		del_gendisk(cd->disk);
 		pi_release(cd->pi);
-		blk_cleanup_disk(cd->disk);
+		put_disk(cd->disk);
 
 		blk_mq_free_tag_set(&cd->tag_set);
 	}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 3637c38c72f9..c8c14c6f5c3a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -943,7 +943,7 @@ static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port,
 		goto cleanup_disk;
 	return 0;
 cleanup_disk:
-	blk_cleanup_disk(disk->gd);
+	put_disk(disk->gd);
 put_disk:
 	put_disk(p);
 	disk->gd = NULL;
@@ -1018,7 +1018,7 @@ static void __exit pd_exit(void)
 		if (p) {
 			disk->gd = NULL;
 			del_gendisk(p);
-			blk_cleanup_disk(p);
+			put_disk(p);
 			blk_mq_free_tag_set(&disk->tag_set);
 			pi_release(disk->pi);
 		}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 292e9a4ce1b9..eec1b9fde245 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -975,7 +975,7 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port,
 out_pi_release:
 	pi_release(pf->pi);
 out_free_disk:
-	blk_cleanup_disk(pf->disk);
+	put_disk(pf->disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(&pf->tag_set);
 	return ret;
@@ -1044,7 +1044,7 @@ static void __exit pf_exit(void)
 		if (!pf->present)
 			continue;
 		del_gendisk(pf->disk);
-		blk_cleanup_disk(pf->disk);
+		put_disk(pf->disk);
 		blk_mq_free_tag_set(&pf->tag_set);
 		pi_release(pf->pi);
 	}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 789093375344..653d24231483 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2733,7 +2733,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	return 0;
 
 out_mem2:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_mem:
 	mempool_exit(&pd->rb_pool);
 	kfree(pd);
@@ -2783,7 +2783,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	pkt_dbg(1, pd, "writer unmapped\n");
 
 	del_gendisk(pd->disk);
-	blk_cleanup_disk(pd->disk);
+	put_disk(pd->disk);
 
 	mempool_exit(&pd->rb_pool);
 	kfree(pd);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 3054adf77460..36d7b36c60c7 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -473,7 +473,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 
 	return 0;
 fail_cleanup_disk:
-	blk_cleanup_disk(gendisk);
+	put_disk(gendisk);
 fail_free_tag_set:
 	blk_mq_free_tag_set(&priv->tag_set);
 fail_teardown:
@@ -500,7 +500,7 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev)
 		    &ps3disk_mask);
 	mutex_unlock(&ps3disk_mask_mutex);
 	del_gendisk(priv->gendisk);
-	blk_cleanup_disk(priv->gendisk);
+	put_disk(priv->gendisk);
 	blk_mq_free_tag_set(&priv->tag_set);
 	dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
 	ps3disk_sync_cache(dev);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 4f90819e245e..d1e0fefec90b 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -761,7 +761,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(gendisk);
+	put_disk(gendisk);
 out_cache_cleanup:
 	remove_proc_entry(DEVICE_NAME, NULL);
 	ps3vram_cache_cleanup(dev);
@@ -792,7 +792,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev)
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 
 	del_gendisk(priv->gendisk);
-	blk_cleanup_disk(priv->gendisk);
+	put_disk(priv->gendisk);
 	remove_proc_entry(DEVICE_NAME, NULL);
 	ps3vram_cache_cleanup(dev);
 	iounmap(priv->reports);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ef9bc62e9afd..0d8ec2fe5740 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4729,7 +4729,7 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
 {
-	blk_cleanup_disk(rbd_dev->disk);
+	put_disk(rbd_dev->disk);
 	blk_mq_free_tag_set(&rbd_dev->tag_set);
 	rbd_dev->disk = NULL;
 }
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index a4470374f54f..b8d9e2824d9c 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1408,7 +1408,7 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
 	err = add_disk(dev->gd);
 	if (err)
-		blk_cleanup_disk(dev->gd);
+		put_disk(dev->gd);
 
 	return err;
 }
@@ -1630,7 +1630,7 @@ put_sess:
 static void destroy_gen_disk(struct rnbd_clt_dev *dev)
 {
 	del_gendisk(dev->gd);
-	blk_cleanup_disk(dev->gd);
+	put_disk(dev->gd);
 }
 
 static void destroy_sysfs(struct rnbd_clt_dev *dev,
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index dd0a1a6fed29..fb855da971ee 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -886,7 +886,7 @@ static int probe_disk(struct vdc_port *port)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(g);
+	put_disk(g);
 out_free_tag:
 	blk_mq_free_tag_set(&port->tag_set);
 	return err;
@@ -1070,7 +1070,7 @@ static void vdc_port_remove(struct vio_dev *vdev)
 		del_timer_sync(&port->vio.timer);
 
 		del_gendisk(port->disk);
-		blk_cleanup_disk(port->disk);
+		put_disk(port->disk);
 		blk_mq_free_tag_set(&port->tag_set);
 
 		vdc_free_tx_ring(port);
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index fef65a18d56f..42b4b6828690 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -783,7 +783,7 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs)
 	if (fs->registered)
 		del_gendisk(fs->disk);
 
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 	blk_mq_free_tag_set(&fs->tag_set);
 }
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 6c39f2c9f806..da811a7da03f 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1238,7 +1238,7 @@ static int swim3_attach(struct macio_dev *mdev,
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(&fs->tag_set);
 out_unregister:
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 75057dbbcfbe..0e1a484cab0b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1377,7 +1377,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no)
 
 	if (host->state > HST_DEV_ACTIVATE)
 		del_gendisk(disk);
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 }
 
 static int carm_init_shm(struct carm_host *host)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index cff1b6f6b054..d7d72e8f6e55 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1089,7 +1089,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(vblk->disk);
+	put_disk(vblk->disk);
 out_free_tags:
 	blk_mq_free_tag_set(&vblk->tag_set);
 out_free_vq:
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 33f04ef78984..3bc80f35418b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2384,7 +2384,7 @@ static void blkfront_connect(struct blkfront_info *info)
 
 	err = device_add_disk(&info->xbdev->dev, info->gd, NULL);
 	if (err) {
-		blk_cleanup_disk(info->gd);
+		put_disk(info->gd);
 		blk_mq_free_tag_set(&info->tag_set);
 		info->rq = NULL;
 		goto fail;
@@ -2469,7 +2469,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 	blkif_free(info, 0);
 	if (info->gd) {
 		xlbd_release_minors(info->gd->first_minor, info->gd->minors);
-		blk_cleanup_disk(info->gd);
+		put_disk(info->gd);
 		blk_mq_free_tag_set(&info->tag_set);
 	}
 
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 18ad43d9933e..c1e85f356e4d 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -337,7 +337,7 @@ static int z2ram_register_disk(int minor)
 	z2ram_gendisk[minor] = disk;
 	err = add_disk(disk);
 	if (err)
-		blk_cleanup_disk(disk);
+		put_disk(disk);
 	return err;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b8549c61ff2c..e5233c911e43 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1957,7 +1957,7 @@ static int zram_add(void)
 	return device_id;
 
 out_cleanup_disk:
-	blk_cleanup_disk(zram->disk);
+	put_disk(zram->disk);
 out_free_idr:
 	idr_remove(&zram_index_idr, device_id);
 out_free_dev:
@@ -2008,7 +2008,7 @@ static int zram_remove(struct zram *zram)
 	 */
 	zram_reset_device(zram);
 
-	blk_cleanup_disk(zram->disk);
+	put_disk(zram->disk);
 	kfree(zram);
 	return 0;
 }
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index f4cc90ea6198..ceded5772aac 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -817,7 +817,7 @@ probe_fail_free_irqs:
 	free_irq(HW_EVENT_GDROM_DMA, &gd);
 	free_irq(HW_EVENT_GDROM_CMD, &gd);
 probe_fail_cleanup_disk:
-	blk_cleanup_disk(gd.disk);
+	put_disk(gd.disk);
 probe_fail_free_tag_set:
 	blk_mq_free_tag_set(&gd.tag_set);
 probe_fail_free_cd_info:
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3563d15dbaf2..9dd752d272f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -884,7 +884,7 @@ static void bcache_device_free(struct bcache_device *d)
 	if (disk) {
 		ida_simple_remove(&bcache_device_idx,
 				  first_minor_to_idx(disk->first_minor));
-		blk_cleanup_disk(disk);
+		put_disk(disk);
 	}
 
 	bioset_exit(&d->bio_split);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4c04a980fcd9..8872f9c63688 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1894,7 +1894,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 			del_gendisk(md->disk);
 		}
 		dm_queue_destroy_crypto_profile(md->queue);
-		blk_cleanup_disk(md->disk);
+		put_disk(md->disk);
 	}
 
 	if (md->pending_io) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c7ecb0bffda0..076255ec9ba1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5579,7 +5579,7 @@ static void md_free(struct kobject *ko)
 
 	if (mddev->gendisk) {
 		del_gendisk(mddev->gendisk);
-		blk_cleanup_disk(mddev->gendisk);
+		put_disk(mddev->gendisk);
 	}
 	percpu_ref_exit(&mddev->writes_pending);
 
@@ -5718,7 +5718,7 @@ static int md_alloc(dev_t dev, char *name)
 out_del_gendisk:
 	del_gendisk(disk);
 out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
 out_unlock_disks_mutex:
 	mutex_unlock(&disks_mutex);
 	mddev_put(mddev);
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index ba7e7249a3db..ed9a683b3ca8 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2129,7 +2129,7 @@ static int msb_init_disk(struct memstick_dev *card)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(msb->disk);
+	put_disk(msb->disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(&msb->tag_set);
 out_release_id:
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 72e91c06c618..61cf75d4a01e 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1209,7 +1209,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(msb->disk);
+	put_disk(msb->disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(&msb->tag_set);
 out_release_id:
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index f73172111465..60b222799871 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -29,7 +29,7 @@ static void blktrans_dev_release(struct kref *kref)
 	struct mtd_blktrans_dev *dev =
 		container_of(kref, struct mtd_blktrans_dev, ref);
 
-	blk_cleanup_disk(dev->disk);
+	put_disk(dev->disk);
 	blk_mq_free_tag_set(dev->tag_set);
 	kfree(dev->tag_set);
 	list_del(&dev->list);
@@ -398,7 +398,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(new->disk);
+	put_disk(new->disk);
 out_free_tag_set:
 	blk_mq_free_tag_set(new->tag_set);
 out_kfree_tag_set:
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index a78fdf3b30f7..4cf67a2a0d04 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -467,7 +467,7 @@ out_destroy_wq:
 out_remove_minor:
 	idr_remove(&ubiblock_minor_idr, gd->first_minor);
 out_cleanup_disk:
-	blk_cleanup_disk(dev->gd);
+	put_disk(dev->gd);
 out_free_tags:
 	blk_mq_free_tag_set(&dev->tag_set);
 out_free_dev:
@@ -486,7 +486,7 @@ static void ubiblock_cleanup(struct ubiblock *dev)
 	destroy_workqueue(dev->wq);
 	/* Finally destroy the blk queue */
 	dev_info(disk_to_dev(dev->gd), "released");
-	blk_cleanup_disk(dev->gd);
+	put_disk(dev->gd);
 	blk_mq_free_tag_set(&dev->tag_set);
 	idr_remove(&ubiblock_minor_idr, dev->gd->first_minor);
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 9613e54c7a67..5e622c0d4b66 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1548,14 +1548,14 @@ static int btt_blk_init(struct btt *btt)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(btt->btt_disk);
+	put_disk(btt->btt_disk);
 	return rc;
 }
 
 static void btt_blk_cleanup(struct btt *btt)
 {
 	del_gendisk(btt->btt_disk);
-	blk_cleanup_disk(btt->btt_disk);
+	put_disk(btt->btt_disk);
 }
 
 /**
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 629d10fcf53b..a72b81fa3242 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -450,7 +450,7 @@ static void pmem_release_disk(void *__pmem)
 	put_dax(pmem->dax_dev);
 	del_gendisk(pmem->disk);
 
-	blk_cleanup_disk(pmem->disk);
+	put_disk(pmem->disk);
 }
 
 static int pmem_attach_disk(struct device *dev,
@@ -596,7 +596,7 @@ out_cleanup_dax:
 	kill_dax(pmem->dax_dev);
 	put_dax(pmem->dax_dev);
 out:
-	blk_cleanup_disk(pmem->disk);
+	put_disk(pmem->disk);
 	return rc;
 }
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4e3a0f7bfc9c..b5b24998a5ab 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4061,7 +4061,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	mutex_unlock(&ctrl->subsys->lock);
 	nvme_put_ns_head(ns->head);
  out_cleanup_disk:
-	blk_cleanup_disk(disk);
+	put_disk(disk);
  out_free_ns:
 	kfree(ns);
  out_free_id:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d3e2440d8abb..ccf9a6da8f6e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -853,7 +853,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	/* make sure all pending bios are cleaned up */
 	kblockd_schedule_work(&head->requeue_work);
 	flush_work(&head->requeue_work);
-	blk_cleanup_disk(head->disk);
+	put_disk(head->disk);
 }
 
 void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 8d0d0eaa3059..4d8d1759775a 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -414,7 +414,7 @@ removeseg:
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);
-	blk_cleanup_disk(dev_info->gd);
+	put_disk(dev_info->gd);
 	up_write(&dcssblk_devices_sem);
 
 	if (device_remove_file_self(dev, attr)) {
@@ -712,7 +712,7 @@ out_dax:
 	put_dax(dev_info->dax_dev);
 put_dev:
 	list_del(&dev_info->lh);
-	blk_cleanup_disk(dev_info->gd);
+	put_disk(dev_info->gd);
 	list_for_each_entry(seg_info, &dev_info->seg_list, lh) {
 		segment_unload(seg_info->segment_name);
 	}
@@ -722,7 +722,7 @@ put_dev:
 dev_list_del:
 	list_del(&dev_info->lh);
 release_gd:
-	blk_cleanup_disk(dev_info->gd);
+	put_disk(dev_info->gd);
 	up_write(&dcssblk_devices_sem);
 seg_list_del:
 	if (dev_info == NULL)
@@ -790,7 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);
-	blk_cleanup_disk(dev_info->gd);
+	put_disk(dev_info->gd);
 
 	/* unload all related segments */
 	list_for_each_entry(entry, &dev_info->seg_list, lh)
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 2a9c0ddcade5..0c1df1d5f1ac 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -501,7 +501,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	return 0;
 
 out_cleanup_disk:
-	blk_cleanup_disk(bdev->gendisk);
+	put_disk(bdev->gendisk);
 out_tag:
 	blk_mq_free_tag_set(&bdev->tag_set);
 out:
@@ -512,7 +512,7 @@ out:
 void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
 {
 	del_gendisk(bdev->gendisk);
-	blk_cleanup_disk(bdev->gendisk);
+	put_disk(bdev->gendisk);
 	blk_mq_free_tag_set(&bdev->tag_set);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 530eeccffda3..22b12531aeb7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -834,7 +834,6 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
 									\
 	__blk_alloc_disk(node_id, &__key);				\
 })
-void blk_cleanup_disk(struct gendisk *disk);
 
 int __register_blkdev(unsigned int major, const char *name,
 		void (*probe)(dev_t devt));
-- 
cgit v1.2.3-59-g8ed1b


From cc5c516df028b221d94c65c47c5ae8d20f61b6f9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:45 +0200
Subject: block: simplify blktrace sysfs attribute creation

Add the trace attributes to the default gendisk attributes, just like
we already do for partitions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c            | 11 +----------
 block/blk.h                  |  2 ++
 block/genhd.c                |  3 +++
 block/partitions/core.c      |  1 -
 include/linux/blktrace_api.h | 10 ----------
 kernel/trace/blktrace.c      | 11 -----------
 6 files changed, 6 insertions(+), 32 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9b211e519de8..5f3f73115988 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -810,21 +810,14 @@ int blk_register_queue(struct gendisk *disk)
 	struct device *dev = disk_to_dev(disk);
 	struct request_queue *q = disk->queue;
 
-	ret = blk_trace_init_sysfs(dev);
-	if (ret)
-		return ret;
-
 	mutex_lock(&q->sysfs_dir_lock);
 
 	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
-	if (ret < 0) {
-		blk_trace_remove_sysfs(dev);
+	if (ret < 0)
 		goto unlock;
-	}
 
 	ret = sysfs_create_group(&q->kobj, &queue_attr_group);
 	if (ret) {
-		blk_trace_remove_sysfs(dev);
 		kobject_del(&q->kobj);
 		kobject_put(&dev->kobj);
 		goto unlock;
@@ -890,7 +883,6 @@ put_dev:
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
 	kobject_del(&q->kobj);
-	blk_trace_remove_sysfs(dev);
 	kobject_put(&dev->kobj);
 
 	return ret;
@@ -931,7 +923,6 @@ void blk_unregister_queue(struct gendisk *disk)
 	if (queue_is_mq(q))
 		blk_mq_unregister_dev(disk_to_dev(disk), q);
 	blk_crypto_sysfs_unregister(q);
-	blk_trace_remove_sysfs(disk_to_dev(disk));
 
 	mutex_lock(&q->sysfs_lock);
 	elv_unregister_queue(q);
diff --git a/block/blk.h b/block/blk.h
index 1a0d3e6a4a63..74d59435870c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -452,6 +452,8 @@ extern struct device_attribute dev_attr_events;
 extern struct device_attribute dev_attr_events_async;
 extern struct device_attribute dev_attr_events_poll_msecs;
 
+extern struct attribute_group blk_trace_attr_group;
+
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
diff --git a/block/genhd.c b/block/genhd.c
index bf9be06af2c8..b1fb7e058b9c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1134,6 +1134,9 @@ static struct attribute_group disk_attr_group = {
 
 static const struct attribute_group *disk_attr_groups[] = {
 	&disk_attr_group,
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+	&blk_trace_attr_group,
+#endif
 	NULL
 };
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 8a0ec929023b..7dc487f5b03c 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -9,7 +9,6 @@
 #include <linux/slab.h>
 #include <linux/ctype.h>
 #include <linux/vmalloc.h>
-#include <linux/blktrace_api.h>
 #include <linux/raid/detect.h>
 #include "check.h"
 
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 623e22492afa..f6f9b544365a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -77,10 +77,6 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
-extern void blk_trace_remove_sysfs(struct device *dev);
-extern int blk_trace_init_sysfs(struct device *dev);
-
-extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
 # define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
@@ -91,13 +87,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
 # define blk_add_cgroup_trace_msg(q, cg, fmt, ...)	do { } while (0)
-# define blk_trace_remove_sysfs(dev)			do { } while (0)
 # define blk_trace_note_message_enabled(q)		(false)
-static inline int blk_trace_init_sysfs(struct device *dev)
-{
-	return 0;
-}
-
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_COMPAT
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fe04c6f96ca5..c584effe5fe9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1867,17 +1867,6 @@ out_unlock_bdev:
 out:
 	return ret ? ret : count;
 }
-
-int blk_trace_init_sysfs(struct device *dev)
-{
-	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
-}
-
-void blk_trace_remove_sysfs(struct device *dev)
-{
-	sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
-}
-
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
-- 
cgit v1.2.3-59-g8ed1b


From 060f131e9c438837f9792e456fae424e621fb881 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:46 +0200
Subject: block: remove a superflous queue kobject reference

kobject_add already adds a reference to the parent that is dropped
on deletion, so don't bother grabbing another one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5f3f73115988..f9373da591b8 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -812,14 +812,13 @@ int blk_register_queue(struct gendisk *disk)
 
 	mutex_lock(&q->sysfs_dir_lock);
 
-	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
+	ret = kobject_add(&q->kobj, &dev->kobj, "%s", "queue");
 	if (ret < 0)
 		goto unlock;
 
 	ret = sysfs_create_group(&q->kobj, &queue_attr_group);
 	if (ret) {
 		kobject_del(&q->kobj);
-		kobject_put(&dev->kobj);
 		goto unlock;
 	}
 
@@ -883,7 +882,6 @@ put_dev:
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
 	kobject_del(&q->kobj);
-	kobject_put(&dev->kobj);
 
 	return ret;
 }
@@ -941,6 +939,4 @@ void blk_unregister_queue(struct gendisk *disk)
 	q->sched_debugfs_dir = NULL;
 	q->rqos_debugfs_dir = NULL;
 	mutex_unlock(&q->debugfs_mutex);
-
-	kobject_put(&disk_to_dev(disk)->kobj);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4a8d14bba486cca6880062f1ef240cf1d45f3367 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:47 +0200
Subject: block: use default groups to register the queue attributes

Set up the default_groups for blk_queue_ktype instead of manually calling
sysfs_create_group.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f9373da591b8..b72506770b97 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -795,7 +795,13 @@ static const struct sysfs_ops queue_sysfs_ops = {
 	.store	= queue_attr_store,
 };
 
+static const struct attribute_group *blk_queue_attr_groups[] = {
+	&queue_attr_group,
+	NULL
+};
+
 struct kobj_type blk_queue_ktype = {
+	.default_groups = blk_queue_attr_groups,
 	.sysfs_ops	= &queue_sysfs_ops,
 	.release	= blk_release_queue,
 };
@@ -816,12 +822,6 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret < 0)
 		goto unlock;
 
-	ret = sysfs_create_group(&q->kobj, &queue_attr_group);
-	if (ret) {
-		kobject_del(&q->kobj);
-		goto unlock;
-	}
-
 	if (queue_is_mq(q))
 		__blk_mq_register_dev(dev, q);
 	mutex_lock(&q->sysfs_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 81f0c2ef41b02185928563899cd4d618ffc7eebf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:48 +0200
Subject: block: remove the extra gendisk reference in __blk_mq_register_dev

kobject_add already grabs a reference to the parent, no need to have
another one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sysfs.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index c08426975856..f4caaa668e3c 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -215,7 +215,6 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);
-	kobject_put(&dev->kobj);
 
 	q->mq_sysfs_init_done = false;
 }
@@ -261,7 +260,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 	WARN_ON_ONCE(!q->kobj.parent);
 	lockdep_assert_held(&q->sysfs_dir_lock);
 
-	ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
+	ret = kobject_add(q->mq_kobj, &dev->kobj, "%s", "mq");
 	if (ret < 0)
 		goto out;
 
@@ -286,7 +285,6 @@ unreg:
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);
-	kobject_put(&dev->kobj);
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From eaa870f97544668025ba1f96ee267abac7b3c73c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:49 +0200
Subject: blk-mq: rename blk_mq_sysfs_{,un}register

Add a _hctx postfix to better describe what the functions do, match
the debugfs equivalents and release the old names for functions that
should be using them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sysfs.c | 4 ++--
 block/blk-mq.c       | 4 ++--
 block/blk-mq.h       | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index f4caaa668e3c..ee6efe2b250d 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -288,7 +288,7 @@ unreg:
 	return ret;
 }
 
-void blk_mq_sysfs_unregister(struct request_queue *q)
+void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
@@ -304,7 +304,7 @@ unlock:
 	mutex_unlock(&q->sysfs_dir_lock);
 }
 
-int blk_mq_sysfs_register(struct request_queue *q)
+int blk_mq_sysfs_register_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b1dbc4b2c2c9..15c7c5c4ad22 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4559,7 +4559,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_debugfs_unregister_hctxs(q);
-		blk_mq_sysfs_unregister(q);
+		blk_mq_sysfs_unregister_hctxs(q);
 	}
 
 	prev_nr_hw_queues = set->nr_hw_queues;
@@ -4590,7 +4590,7 @@ fallback:
 
 reregister:
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
-		blk_mq_sysfs_register(q);
+		blk_mq_sysfs_register_hctxs(q);
 		blk_mq_debugfs_register_hctxs(q);
 	}
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e4c6fe2c8ac8..a92639f2bfd2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -119,8 +119,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
 extern void blk_mq_sysfs_init(struct request_queue *q);
 extern void blk_mq_sysfs_deinit(struct request_queue *q);
 extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
-extern int blk_mq_sysfs_register(struct request_queue *q);
-extern void blk_mq_sysfs_unregister(struct request_queue *q);
+int blk_mq_sysfs_register_hctxs(struct request_queue *q);
+void blk_mq_sysfs_unregister_hctxs(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 void blk_mq_free_plug_rqs(struct blk_plug *plug);
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
-- 
cgit v1.2.3-59-g8ed1b


From 8682b92e5ab852b93739a0f2b261fff4c733be57 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2022 19:18:50 +0200
Subject: blk-mq: cleanup disk sysfs registration

Pass a gendisk to the sysfs register/unregister functions and give
them descriptive names.  Also move the unregistration helper next
to the one doing the registration.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220628171850.1313069-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sysfs.c   | 39 ++++++++++++++++++++-------------------
 block/blk-mq.h         |  3 ++-
 block/blk-sysfs.c      |  9 ++++-----
 include/linux/blk-mq.h |  1 -
 4 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ee6efe2b250d..93997d297d42 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -203,22 +203,6 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 	return ret;
 }
 
-void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned long i;
-
-	lockdep_assert_held(&q->sysfs_dir_lock);
-
-	queue_for_each_hw_ctx(q, hctx, i)
-		blk_mq_unregister_hctx(hctx);
-
-	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
-	kobject_del(q->mq_kobj);
-
-	q->mq_sysfs_init_done = false;
-}
-
 void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 {
 	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
@@ -251,16 +235,16 @@ void blk_mq_sysfs_init(struct request_queue *q)
 	}
 }
 
-int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
+int blk_mq_sysfs_register(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i, j;
 	int ret;
 
-	WARN_ON_ONCE(!q->kobj.parent);
 	lockdep_assert_held(&q->sysfs_dir_lock);
 
-	ret = kobject_add(q->mq_kobj, &dev->kobj, "%s", "mq");
+	ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq");
 	if (ret < 0)
 		goto out;
 
@@ -288,6 +272,23 @@ unreg:
 	return ret;
 }
 
+void blk_mq_sysfs_unregister(struct gendisk *disk)
+{
+	struct request_queue *q = disk->queue;
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i;
+
+	lockdep_assert_held(&q->sysfs_dir_lock);
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_unregister_hctx(hctx);
+
+	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
+	kobject_del(q->mq_kobj);
+
+	q->mq_sysfs_init_done = false;
+}
+
 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index a92639f2bfd2..54e20edf0da3 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -118,7 +118,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
  */
 extern void blk_mq_sysfs_init(struct request_queue *q);
 extern void blk_mq_sysfs_deinit(struct request_queue *q);
-extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
+int blk_mq_sysfs_register(struct gendisk *disk);
+void blk_mq_sysfs_unregister(struct gendisk *disk);
 int blk_mq_sysfs_register_hctxs(struct request_queue *q);
 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b72506770b97..85ea43eff094 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -812,18 +812,17 @@ struct kobj_type blk_queue_ktype = {
  */
 int blk_register_queue(struct gendisk *disk)
 {
-	int ret;
-	struct device *dev = disk_to_dev(disk);
 	struct request_queue *q = disk->queue;
+	int ret;
 
 	mutex_lock(&q->sysfs_dir_lock);
 
-	ret = kobject_add(&q->kobj, &dev->kobj, "%s", "queue");
+	ret = kobject_add(&q->kobj, &disk_to_dev(disk)->kobj, "queue");
 	if (ret < 0)
 		goto unlock;
 
 	if (queue_is_mq(q))
-		__blk_mq_register_dev(dev, q);
+		blk_mq_sysfs_register(disk);
 	mutex_lock(&q->sysfs_lock);
 
 	mutex_lock(&q->debugfs_mutex);
@@ -919,7 +918,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	 * structures that can be modified through sysfs.
 	 */
 	if (queue_is_mq(q))
-		blk_mq_unregister_dev(disk_to_dev(disk), q);
+		blk_mq_sysfs_unregister(disk);
 	blk_crypto_sysfs_unregister(q);
 
 	mutex_lock(&q->sysfs_lock);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0fd96e92c6c6..43aad0da3305 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -691,7 +691,6 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		struct request_queue *q);
-void blk_mq_unregister_dev(struct device *, struct request_queue *);
 void blk_mq_destroy_queue(struct request_queue *);
 
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
-- 
cgit v1.2.3-59-g8ed1b


From b9a1c179bdfa133d28ab8b7d30631b0accdc2057 Mon Sep 17 00:00:00 2001
From: Ying Sun <sunying@nj.iscas.ac.cn>
Date: Wed, 29 Jun 2022 14:24:09 +0800
Subject: block: remove "select BLK_RQ_IO_DATA_LEN" from BLK_CGROUP_IOCOST
 dependency

The configuration item BLK_RQ_IO_DATA_LEN is not declared in the kernel.
Select BLK_RQ_IO_DATA_LEN is meaningless which could be removed.

Signed-off-by: Ying Sun <sunying@nj.iscas.ac.cn>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220629062409.19458-1-sunying@nj.iscas.ac.cn
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'block')

diff --git a/block/Kconfig b/block/Kconfig
index 50b17e260fa2..444c5ab3b67e 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -147,7 +147,6 @@ config BLK_CGROUP_FC_APPID
 config BLK_CGROUP_IOCOST
 	bool "Enable support for cost model based cgroup IO controller"
 	depends on BLK_CGROUP
-	select BLK_RQ_IO_DATA_LEN
 	select BLK_RQ_ALLOC_TIME
 	help
 	Enabling this option enables the .weight interface for cost
-- 
cgit v1.2.3-59-g8ed1b


From 6a27d28c81bc5843de2490688a04ee5baa6615e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 29 Jun 2022 08:20:12 +0200
Subject: block: move ->ia_ranges from the request_queue to the gendisk

Independent access ranges only matter for file system I/O and are only
valid with a registered gendisk, so move them there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Link: https://lore.kernel.org/r/20220629062013.1331068-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ia-ranges.c  | 18 +++++++++---------
 include/linux/blkdev.h | 12 ++++++------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'block')

diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index 47c89e65b57f..c1bf14bcd15f 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -106,7 +106,7 @@ static struct kobj_type blk_ia_ranges_ktype = {
  *
  * Register with sysfs a set of independent access ranges for @disk.
  * If @new_iars is not NULL, this set of ranges is registered and the old set
- * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is
+ * specified by disk->ia_ranges is unregistered. Otherwise, disk->ia_ranges is
  * registered if it is not already.
  */
 int disk_register_independent_access_ranges(struct gendisk *disk,
@@ -121,12 +121,12 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
 
 	/* If a new range set is specified, unregister the old one */
 	if (new_iars) {
-		if (q->ia_ranges)
+		if (disk->ia_ranges)
 			disk_unregister_independent_access_ranges(disk);
-		q->ia_ranges = new_iars;
+		disk->ia_ranges = new_iars;
 	}
 
-	iars = q->ia_ranges;
+	iars = disk->ia_ranges;
 	if (!iars)
 		return 0;
 
@@ -138,7 +138,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
 	ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
 				   &q->kobj, "%s", "independent_access_ranges");
 	if (ret) {
-		q->ia_ranges = NULL;
+		disk->ia_ranges = NULL;
 		kobject_put(&iars->kobj);
 		return ret;
 	}
@@ -164,7 +164,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
 void disk_unregister_independent_access_ranges(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
-	struct blk_independent_access_ranges *iars = q->ia_ranges;
+	struct blk_independent_access_ranges *iars = disk->ia_ranges;
 	int i;
 
 	lockdep_assert_held(&q->sysfs_dir_lock);
@@ -182,7 +182,7 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk)
 		kfree(iars);
 	}
 
-	q->ia_ranges = NULL;
+	disk->ia_ranges = NULL;
 }
 
 static struct blk_independent_access_range *
@@ -242,7 +242,7 @@ static bool disk_check_ia_ranges(struct gendisk *disk,
 static bool disk_ia_ranges_changed(struct gendisk *disk,
 				   struct blk_independent_access_ranges *new)
 {
-	struct blk_independent_access_ranges *old = disk->queue->ia_ranges;
+	struct blk_independent_access_ranges *old = disk->ia_ranges;
 	int i;
 
 	if (!old)
@@ -331,7 +331,7 @@ reg:
 	if (blk_queue_registered(q)) {
 		disk_register_independent_access_ranges(disk, iars);
 	} else {
-		swap(q->ia_ranges, iars);
+		swap(disk->ia_ranges, iars);
 		kfree(iars);
 	}
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22b12531aeb7..b9a94c53c6cd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -171,6 +171,12 @@ struct gendisk {
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
 	u64 diskseq;
+
+	/*
+	 * Independent sector access ranges. This is always NULL for
+	 * devices that do not have multiple independent access ranges.
+	 */
+	struct blk_independent_access_ranges *ia_ranges;
 };
 
 static inline bool disk_live(struct gendisk *disk)
@@ -539,12 +545,6 @@ struct request_queue {
 
 	bool			mq_sysfs_init_done;
 
-	/*
-	 * Independent sector access ranges. This is always NULL for
-	 * devices that do not have multiple independent access ranges.
-	 */
-	struct blk_independent_access_ranges *ia_ranges;
-
 	/**
 	 * @srcu: Sleepable RCU. Use as lock when type of the request queue
 	 * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
-- 
cgit v1.2.3-59-g8ed1b


From 22d0c4080fe49299640d9d6c43154c49794c2825 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 29 Jun 2022 08:20:13 +0200
Subject: block: simplify disk_set_independent_access_ranges

Lift setting disk->ia_ranges from disk_register_independent_access_ranges
into disk_set_independent_access_ranges, and make the behavior the same
for the registered vs non-registered queue cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Link: https://lore.kernel.org/r/20220629062013.1331068-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ia-ranges.c | 57 +++++++++++++++------------------------------------
 block/blk-sysfs.c     |  2 +-
 block/blk.h           |  3 +--
 3 files changed, 18 insertions(+), 44 deletions(-)

(limited to 'block')

diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index c1bf14bcd15f..2bd1d311033b 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -102,31 +102,18 @@ static struct kobj_type blk_ia_ranges_ktype = {
  * disk_register_independent_access_ranges - register with sysfs a set of
  *		independent access ranges
  * @disk:	Target disk
- * @new_iars:	New set of independent access ranges
  *
  * Register with sysfs a set of independent access ranges for @disk.
- * If @new_iars is not NULL, this set of ranges is registered and the old set
- * specified by disk->ia_ranges is unregistered. Otherwise, disk->ia_ranges is
- * registered if it is not already.
  */
-int disk_register_independent_access_ranges(struct gendisk *disk,
-				struct blk_independent_access_ranges *new_iars)
+int disk_register_independent_access_ranges(struct gendisk *disk)
 {
+	struct blk_independent_access_ranges *iars = disk->ia_ranges;
 	struct request_queue *q = disk->queue;
-	struct blk_independent_access_ranges *iars;
 	int i, ret;
 
 	lockdep_assert_held(&q->sysfs_dir_lock);
 	lockdep_assert_held(&q->sysfs_lock);
 
-	/* If a new range set is specified, unregister the old one */
-	if (new_iars) {
-		if (disk->ia_ranges)
-			disk_unregister_independent_access_ranges(disk);
-		disk->ia_ranges = new_iars;
-	}
-
-	iars = disk->ia_ranges;
 	if (!iars)
 		return 0;
 
@@ -210,6 +197,9 @@ static bool disk_check_ia_ranges(struct gendisk *disk,
 	sector_t sector = 0;
 	int i;
 
+	if (WARN_ON_ONCE(!iars->nr_ia_ranges))
+		return false;
+
 	/*
 	 * While sorting the ranges in increasing LBA order, check that the
 	 * ranges do not overlap, that there are no sector holes and that all
@@ -298,25 +288,15 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
 {
 	struct request_queue *q = disk->queue;
 
-	if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) {
+	mutex_lock(&q->sysfs_dir_lock);
+	mutex_lock(&q->sysfs_lock);
+	if (iars && !disk_check_ia_ranges(disk, iars)) {
 		kfree(iars);
 		iars = NULL;
 	}
-
-	mutex_lock(&q->sysfs_dir_lock);
-	mutex_lock(&q->sysfs_lock);
-
-	if (iars) {
-		if (!disk_check_ia_ranges(disk, iars)) {
-			kfree(iars);
-			iars = NULL;
-			goto reg;
-		}
-
-		if (!disk_ia_ranges_changed(disk, iars)) {
-			kfree(iars);
-			goto unlock;
-		}
+	if (iars && !disk_ia_ranges_changed(disk, iars)) {
+		kfree(iars);
+		goto unlock;
 	}
 
 	/*
@@ -324,17 +304,12 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
 	 * revalidation. If that is the case, we need to unregister the old
 	 * set of independent access ranges and register the new set. If the
 	 * queue is not registered, registration of the device request queue
-	 * will register the independent access ranges, so only swap in the
-	 * new set and free the old one.
+	 * will register the independent access ranges.
 	 */
-reg:
-	if (blk_queue_registered(q)) {
-		disk_register_independent_access_ranges(disk, iars);
-	} else {
-		swap(disk->ia_ranges, iars);
-		kfree(iars);
-	}
-
+	disk_unregister_independent_access_ranges(disk);
+	disk->ia_ranges = iars;
+	if (blk_queue_registered(q))
+		disk_register_independent_access_ranges(disk);
 unlock:
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 85ea43eff094..58cb9cb9f48c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -832,7 +832,7 @@ int blk_register_queue(struct gendisk *disk)
 		blk_mq_debugfs_register(q);
 	mutex_unlock(&q->debugfs_mutex);
 
-	ret = disk_register_independent_access_ranges(disk, NULL);
+	ret = disk_register_independent_access_ranges(disk);
 	if (ret)
 		goto put_dev;
 
diff --git a/block/blk.h b/block/blk.h
index 74d59435870c..58ad50cacd2d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -459,8 +459,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 extern const struct address_space_operations def_blk_aops;
 
-int disk_register_independent_access_ranges(struct gendisk *disk,
-				struct blk_independent_access_ranges *new_iars);
+int disk_register_independent_access_ranges(struct gendisk *disk);
 void disk_unregister_independent_access_ranges(struct gendisk *disk);
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
-- 
cgit v1.2.3-59-g8ed1b


From 362b8c16f8fc73fddfe4bded25055fa0c9e2bf1e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 29 Jun 2022 15:09:16 +0800
Subject: blk-cgroup: factor out blkcg_iostat_update()

To reduce some duplicated code, factor out blkcg_iostat_update(). No
functional change.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220629070917.3113016-2-yanaijie@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6906981563f8..34a9452f93a7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -846,6 +846,21 @@ static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
 	}
 }
 
+static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
+				struct blkg_iostat *last)
+{
+	struct blkg_iostat delta;
+	unsigned long flags;
+
+	/* propagate percpu delta to global */
+	flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+	blkg_iostat_set(&delta, cur);
+	blkg_iostat_sub(&delta, last);
+	blkg_iostat_add(&blkg->iostat.cur, &delta);
+	blkg_iostat_add(last, &delta);
+	u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
+}
+
 static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
@@ -860,8 +875,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
 		struct blkcg_gq *parent = blkg->parent;
 		struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
-		struct blkg_iostat cur, delta;
-		unsigned long flags;
+		struct blkg_iostat cur;
 		unsigned int seq;
 
 		/* fetch the current per-cpu values */
@@ -870,23 +884,12 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 			blkg_iostat_set(&cur, &bisc->cur);
 		} while (u64_stats_fetch_retry(&bisc->sync, seq));
 
-		/* propagate percpu delta to global */
-		flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
-		blkg_iostat_set(&delta, &cur);
-		blkg_iostat_sub(&delta, &bisc->last);
-		blkg_iostat_add(&blkg->iostat.cur, &delta);
-		blkg_iostat_add(&bisc->last, &delta);
-		u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
+		blkcg_iostat_update(blkg, &cur, &bisc->last);
 
 		/* propagate global delta to parent (unless that's root) */
-		if (parent && parent->parent) {
-			flags = u64_stats_update_begin_irqsave(&parent->iostat.sync);
-			blkg_iostat_set(&delta, &blkg->iostat.cur);
-			blkg_iostat_sub(&delta, &blkg->iostat.last);
-			blkg_iostat_add(&parent->iostat.cur, &delta);
-			blkg_iostat_add(&blkg->iostat.last, &delta);
-			u64_stats_update_end_irqrestore(&parent->iostat.sync, flags);
-		}
+		if (parent && parent->parent)
+			blkcg_iostat_update(parent, &blkg->iostat.cur,
+					    &blkg->iostat.last);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From e55cf798140518b900e5254093f1195f65c23026 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 29 Jun 2022 15:09:17 +0800
Subject: blk-cgroup: factor out blkcg_free_all_cpd()

To reduce some duplicated code, factor out blkcg_free_all_cpd(). No
functional change.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220629070917.3113016-3-yanaijie@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 34a9452f93a7..27a2d0ca0c70 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1533,6 +1533,18 @@ void blkcg_deactivate_policy(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
 
+static void blkcg_free_all_cpd(struct blkcg_policy *pol)
+{
+	struct blkcg *blkcg;
+
+	list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
+		if (blkcg->cpd[pol->plid]) {
+			pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+			blkcg->cpd[pol->plid] = NULL;
+		}
+	}
+}
+
 /**
  * blkcg_policy_register - register a blkcg policy
  * @pol: blkcg policy to register
@@ -1597,14 +1609,9 @@ int blkcg_policy_register(struct blkcg_policy *pol)
 	return 0;
 
 err_free_cpds:
-	if (pol->cpd_free_fn) {
-		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
-			if (blkcg->cpd[pol->plid]) {
-				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
-				blkcg->cpd[pol->plid] = NULL;
-			}
-		}
-	}
+	if (pol->cpd_free_fn)
+		blkcg_free_all_cpd(pol);
+
 	blkcg_policy[pol->plid] = NULL;
 err_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
@@ -1621,8 +1628,6 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
  */
 void blkcg_policy_unregister(struct blkcg_policy *pol)
 {
-	struct blkcg *blkcg;
-
 	mutex_lock(&blkcg_pol_register_mutex);
 
 	if (WARN_ON(blkcg_policy[pol->plid] != pol))
@@ -1637,14 +1642,9 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
 	/* remove cpds and unregister */
 	mutex_lock(&blkcg_pol_mutex);
 
-	if (pol->cpd_free_fn) {
-		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
-			if (blkcg->cpd[pol->plid]) {
-				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
-				blkcg->cpd[pol->plid] = NULL;
-			}
-		}
-	}
+	if (pol->cpd_free_fn)
+		blkcg_free_all_cpd(pol);
+
 	blkcg_policy[pol->plid] = NULL;
 
 	mutex_unlock(&blkcg_pol_mutex);
-- 
cgit v1.2.3-59-g8ed1b


From 99e48cd6855e9535488e3c90d65edd46c6e6fc1b Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 6 Jul 2022 20:03:50 +0800
Subject: blk-mq: Add a flag for reserved requests

Add a flag for reserved requests so that drivers may know this for any
special handling.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/1657109034-206040-3-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 6 ++++++
 include/linux/blk-mq.h | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 15c7c5c4ad22..a00e43cc67e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -475,6 +475,9 @@ retry:
 	if (!(data->rq_flags & RQF_ELV))
 		blk_mq_tag_busy(data->hctx);
 
+	if (data->flags & BLK_MQ_REQ_RESERVED)
+		data->rq_flags |= RQF_RESV;
+
 	/*
 	 * Try batched alloc if we want more than 1 tag.
 	 */
@@ -589,6 +592,9 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	else
 		data.rq_flags |= RQF_ELV;
 
+	if (flags & BLK_MQ_REQ_RESERVED)
+		data.rq_flags |= RQF_RESV;
+
 	ret = -EWOULDBLOCK;
 	tag = blk_mq_get_tag(&data);
 	if (tag == BLK_MQ_NO_TAG)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 43aad0da3305..7c62b7fabec7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -57,6 +57,7 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
 /* queue has elevator attached */
 #define RQF_ELV			((__force req_flags_t)(1 << 22))
+#define RQF_RESV			((__force req_flags_t)(1 << 23))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
@@ -825,6 +826,11 @@ static inline bool blk_mq_need_time_stamp(struct request *rq)
 	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
 }
 
+static inline bool blk_mq_is_reserved_rq(struct request *rq)
+{
+	return rq->rq_flags & RQF_RESV;
+}
+
 /*
  * Batched completions only work when there is no I/O error and no special
  * ->end_io handler.
-- 
cgit v1.2.3-59-g8ed1b


From 9bdb4833dd399cbff82cc20893f52bdec66a9eca Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 6 Jul 2022 20:03:51 +0800
Subject: blk-mq: Drop blk_mq_ops.timeout 'reserved' arg

With new API blk_mq_is_reserved_rq() we can tell if a request is from
the reserved pool, so stop passing 'reserved' arg. There is actually
only a single user of that arg for all the callback implementations, which
can use blk_mq_is_reserved_rq() instead.

This will also allow us to stop passing the same 'reserved' around the
blk-mq iter functions next.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/1657109034-206040-4-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c                    | 6 +++---
 block/bsg-lib.c                   | 2 +-
 drivers/block/mtip32xx/mtip32xx.c | 5 ++---
 drivers/block/nbd.c               | 3 +--
 drivers/block/null_blk/main.c     | 2 +-
 drivers/mmc/core/queue.c          | 3 +--
 drivers/nvme/host/apple.c         | 3 +--
 drivers/nvme/host/fc.c            | 3 +--
 drivers/nvme/host/pci.c           | 2 +-
 drivers/nvme/host/rdma.c          | 3 +--
 drivers/nvme/host/tcp.c           | 3 +--
 drivers/s390/block/dasd.c         | 2 +-
 drivers/s390/block/dasd_int.h     | 2 +-
 drivers/scsi/scsi_error.c         | 3 +--
 drivers/scsi/scsi_priv.h          | 3 +--
 include/linux/blk-mq.h            | 2 +-
 16 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a00e43cc67e5..cedbec36e907 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1427,13 +1427,13 @@ bool blk_mq_queue_inflight(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
 
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct request *req)
 {
 	req->rq_flags |= RQF_TIMED_OUT;
 	if (req->q->mq_ops->timeout) {
 		enum blk_eh_timer_return ret;
 
-		ret = req->q->mq_ops->timeout(req, reserved);
+		ret = req->q->mq_ops->timeout(req);
 		if (ret == BLK_EH_DONE)
 			return;
 		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
@@ -1482,7 +1482,7 @@ static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved)
 	 * from blk_mq_check_expired().
 	 */
 	if (blk_mq_req_expired(rq, next))
-		blk_mq_rq_timed_out(rq, reserved);
+		blk_mq_rq_timed_out(rq);
 	return true;
 }
 
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fd4cd5e68282..d6f5dcdce748 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -331,7 +331,7 @@ void bsg_remove_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(bsg_remove_queue);
 
-static enum blk_eh_timer_return bsg_timeout(struct request *rq, bool reserved)
+static enum blk_eh_timer_return bsg_timeout(struct request *rq)
 {
 	struct bsg_set *bset =
 		container_of(rq->q->tag_set, struct bsg_set, tag_set);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e116c6cf56f5..5073cb407500 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3357,12 +3357,11 @@ static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq,
 	return 0;
 }
 
-static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
-								bool reserved)
+static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req)
 {
 	struct driver_data *dd = req->q->queuedata;
 
-	if (reserved) {
+	if (blk_mq_is_reserved_rq(req)) {
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
 		cmd->status = BLK_STS_TIMEOUT;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5c4c9c45c6ac..028f23c965df 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -393,8 +393,7 @@ static u32 req_to_nbd_cmd_type(struct request *req)
 	}
 }
 
-static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
-						 bool reserved)
+static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct nbd_device *nbd = cmd->nbd;
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index d695ea29efa6..4e03a020ee3c 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1578,7 +1578,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 	return nr;
 }
 
-static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
+static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
 {
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index f824cfdab75a..fefaa901b50f 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -116,8 +116,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
 	}
 }
 
-static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
-						 bool reserved)
+static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req)
 {
 	struct request_queue *q = req->q;
 	struct mmc_queue *mq = q->queuedata;
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 2d23b7d41f7e..5c352d5d8ee6 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -862,8 +862,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
 	}
 }
 
-static enum blk_eh_timer_return apple_nvme_timeout(struct request *req,
-						   bool reserved)
+static enum blk_eh_timer_return apple_nvme_timeout(struct request *req)
 {
 	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct apple_nvme_queue *q = iod->q;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index a96aa831684c..07fd6db5869c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2565,8 +2565,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 	nvme_reset_ctrl(&ctrl->ctrl);
 }
 
-static enum blk_eh_timer_return
-nvme_fc_timeout(struct request *rq, bool reserved)
+static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
 {
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 247a74aba336..4232192e10dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1344,7 +1344,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
 		 "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n");
 }
 
-static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
+static enum blk_eh_timer_return nvme_timeout(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0fb7c8e7ab0b..a6eaf38b9646 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2013,8 +2013,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
 	nvmf_complete_timed_out_request(rq);
 }
 
-static enum blk_eh_timer_return
-nvme_rdma_timeout(struct request *rq, bool reserved)
+static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_queue *queue = req->queue;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b81942fa5f95..ff502172accd 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2321,8 +2321,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
 	nvmf_complete_timed_out_request(rq);
 }
 
-static enum blk_eh_timer_return
-nvme_tcp_timeout(struct request *rq, bool reserved)
+static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e8489331f12b..4df8bf6505fc 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3145,7 +3145,7 @@ out:
  * BLK_EH_DONE if the request is handled or terminated
  *		      by the driver.
  */
-enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
+enum blk_eh_timer_return dasd_times_out(struct request *req)
 {
 	struct dasd_block *block = req->q->queuedata;
 	struct dasd_device *device;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 83b918b84b4a..333a399f754e 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -795,7 +795,7 @@ void dasd_free_device(struct dasd_device *);
 struct dasd_block *dasd_alloc_block(void);
 void dasd_free_block(struct dasd_block *);
 
-enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved);
+enum blk_eh_timer_return dasd_times_out(struct request *req);
 
 void dasd_enable_device(struct dasd_device *);
 void dasd_set_target_state(struct dasd_device *, int);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a8b71b73a5a5..266ce414589c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -318,7 +318,6 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 /**
  * scsi_timeout - Timeout function for normal scsi commands.
  * @req:	request that is timing out.
- * @reserved:	whether the request is a reserved request.
  *
  * Notes:
  *     We do not need to lock this.  There is the potential for a race
@@ -326,7 +325,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
  *     normal completion function determines that the timer has already
  *     fired, then it mustn't do anything.
  */
-enum blk_eh_timer_return scsi_timeout(struct request *req, bool reserved)
+enum blk_eh_timer_return scsi_timeout(struct request *req)
 {
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
 	enum blk_eh_timer_return rtn = BLK_EH_DONE;
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 695d0c83ffe0..6eeaa0a7f86d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -72,8 +72,7 @@ extern void scsi_exit_devinfo(void);
 
 /* scsi_error.c */
 extern void scmd_eh_abort_handler(struct work_struct *work);
-extern enum blk_eh_timer_return scsi_timeout(struct request *req,
-					     bool reserved);
+extern enum blk_eh_timer_return scsi_timeout(struct request *req);
 extern int scsi_error_handler(void *host);
 extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd);
 extern void scsi_eh_wakeup(struct Scsi_Host *shost);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7c62b7fabec7..c84c56d296fe 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -575,7 +575,7 @@ struct blk_mq_ops {
 	/**
 	 * @timeout: Called on request timeout.
 	 */
-	enum blk_eh_timer_return (*timeout)(struct request *, bool);
+	enum blk_eh_timer_return (*timeout)(struct request *);
 
 	/**
 	 * @poll: Called to poll for completion of a specific tag.
-- 
cgit v1.2.3-59-g8ed1b


From 2dd6532e9591f201e7571b30915db807603ab924 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 6 Jul 2022 20:03:53 +0800
Subject: blk-mq: Drop 'reserved' arg of busy_tag_iter_fn

We no longer use the 'reserved' arg in busy_tag_iter_fn for any iter
function so it may be dropped.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me> #nvme
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/1657109034-206040-6-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c              |  2 +-
 block/blk-mq-tag.c                  |  7 +++----
 block/blk-mq.c                      | 10 ++++------
 drivers/block/mtip32xx/mtip32xx.c   |  4 ++--
 drivers/block/nbd.c                 |  2 +-
 drivers/infiniband/ulp/srp/ib_srp.c |  3 +--
 drivers/nvme/host/core.c            |  2 +-
 drivers/nvme/host/fc.c              |  3 +--
 drivers/nvme/host/nvme.h            |  2 +-
 drivers/scsi/aacraid/comminit.c     |  2 +-
 drivers/scsi/aacraid/linit.c        |  2 +-
 drivers/scsi/fnic/fnic_scsi.c       | 12 ++++--------
 drivers/scsi/hosts.c                | 14 ++++++--------
 drivers/scsi/mpi3mr/mpi3mr_os.c     | 16 ++++------------
 include/linux/blk-mq.h              |  2 +-
 include/scsi/scsi_host.h            |  2 +-
 16 files changed, 33 insertions(+), 52 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index b80fae7ab1d9..b11add9a95e2 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -375,7 +375,7 @@ struct show_busy_params {
  * e.g. due to a concurrent blk_mq_finish_request() call. Returns true to
  * keep iterating requests.
  */
-static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+static bool hctx_show_busy_rq(struct request *rq, void *data)
 {
 	const struct show_busy_params *params = data;
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 3cfffef1feb3..4e9b8ec55bda 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -283,7 +283,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 		return true;
 
 	if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
-		ret = iter_data->fn(rq, iter_data->data, reserved);
+		ret = iter_data->fn(rq, iter_data->data);
 	blk_mq_put_rq_ref(rq);
 	return ret;
 }
@@ -354,7 +354,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 
 	if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
 	    blk_mq_request_started(rq))
-		ret = iter_data->fn(rq, iter_data->data, reserved);
+		ret = iter_data->fn(rq, iter_data->data);
 	if (!iter_static_rqs)
 		blk_mq_put_rq_ref(rq);
 	return ret;
@@ -444,8 +444,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
 
-static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
-		void *data, bool reserved)
+static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
 {
 	unsigned *count = data;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index cedbec36e907..63385742b8a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -129,8 +129,7 @@ struct mq_inflight {
 	unsigned int inflight[2];
 };
 
-static bool blk_mq_check_inflight(struct request *rq, void *priv,
-				  bool reserved)
+static bool blk_mq_check_inflight(struct request *rq, void *priv)
 {
 	struct mq_inflight *mi = priv;
 
@@ -1400,8 +1399,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-static bool blk_mq_rq_inflight(struct request *rq, void *priv,
-			       bool reserved)
+static bool blk_mq_rq_inflight(struct request *rq, void *priv)
 {
 	/*
 	 * If we find a request that isn't idle we know the queue is busy
@@ -1470,7 +1468,7 @@ void blk_mq_put_rq_ref(struct request *rq)
 		__blk_mq_free_request(rq);
 }
 
-static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved)
+static bool blk_mq_check_expired(struct request *rq, void *priv)
 {
 	unsigned long *next = priv;
 
@@ -3289,7 +3287,7 @@ struct rq_iter_data {
 	bool has_rq;
 };
 
-static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
+static bool blk_mq_has_request(struct request *rq, void *data)
 {
 	struct rq_iter_data *iter_data = data;
 
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 5073cb407500..562725d222a7 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2441,7 +2441,7 @@ static void mtip_softirq_done_fn(struct request *rq)
 	blk_mq_end_request(rq, cmd->status);
 }
 
-static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_abort_cmd(struct request *req, void *data)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
@@ -2454,7 +2454,7 @@ static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
 	return true;
 }
 
-static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_queue_cmd(struct request *req, void *data)
 {
 	struct driver_data *dd = data;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 028f23c965df..f5d098a148cb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -879,7 +879,7 @@ static void recv_work(struct work_struct *work)
 	kfree(args);
 }
 
-static bool nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 6058abf42ba7..7720ea270ed8 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1282,8 +1282,7 @@ struct srp_terminate_context {
 	int scsi_result;
 };
 
-static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr,
-			      bool reserved)
+static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr)
 {
 	struct srp_terminate_context *context = context_ptr;
 	struct srp_target_port *target = context->srp_target;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b5b24998a5ab..9031d10c97dc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -418,7 +418,7 @@ blk_status_t nvme_host_path_error(struct request *req)
 }
 EXPORT_SYMBOL_GPL(nvme_host_path_error);
 
-bool nvme_cancel_request(struct request *req, void *data, bool reserved)
+bool nvme_cancel_request(struct request *req, void *data)
 {
 	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
 				"Cancelling I/O %d", req->tag);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 07fd6db5869c..9987797620b6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2456,8 +2456,7 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
  * status. The done path will return the io request back to the block
  * layer with an error status.
  */
-static bool
-nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+static bool nvme_fc_terminate_exchange(struct request *req, void *data)
 {
 	struct nvme_ctrl *nctrl = data;
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0da94b233fed..e4daa57f8bd5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -697,7 +697,7 @@ static __always_inline void nvme_complete_batch(struct io_comp_batch *iob,
 }
 
 blk_status_t nvme_host_path_error(struct request *req);
-bool nvme_cancel_request(struct request *req, void *data, bool reserved);
+bool nvme_cancel_request(struct request *req, void *data);
 void nvme_cancel_tagset(struct nvme_ctrl *ctrl);
 void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 940a6deab38f..bd99c5492b7d 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -272,7 +272,7 @@ static void aac_queue_init(struct aac_dev * dev, struct aac_queue * q, u32 *mem,
 	q->entries = qsize;
 }
 
-static bool wait_for_io_iter(struct scsi_cmnd *cmd, void *data, bool rsvd)
+static bool wait_for_io_iter(struct scsi_cmnd *cmd, void *data)
 {
 	int *active = data;
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 9c27bc37e5de..5ba5c18b77b4 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -633,7 +633,7 @@ struct fib_count_data {
 	int krlcnt;
 };
 
-static bool fib_count_iter(struct scsi_cmnd *scmnd, void *data, bool reserved)
+static bool fib_count_iter(struct scsi_cmnd *scmnd, void *data)
 {
 	struct fib_count_data *fib_count = data;
 
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index e7b7f6d73429..77a4d9f8aa83 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1350,8 +1350,7 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
 	return wq_work_done;
 }
 
-static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data,
-				 bool reserved)
+static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data)
 {
 	const int tag = scsi_cmd_to_rq(sc)->tag;
 	struct fnic *fnic = data;
@@ -1548,8 +1547,7 @@ struct fnic_rport_abort_io_iter_data {
 	int term_cnt;
 };
 
-static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data,
-				     bool reserved)
+static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data)
 {
 	struct fnic_rport_abort_io_iter_data *iter_data = data;
 	struct fnic *fnic = iter_data->fnic;
@@ -2003,8 +2001,7 @@ struct fnic_pending_aborts_iter_data {
 	int ret;
 };
 
-static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc,
-				     void *data, bool reserved)
+static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
 {
 	struct fnic_pending_aborts_iter_data *iter_data = data;
 	struct fnic *fnic = iter_data->fnic;
@@ -2668,8 +2665,7 @@ call_fc_exch_mgr_reset:
 
 }
 
-static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data,
-				   bool reserved)
+static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data)
 {
 	struct fnic_pending_aborts_iter_data *iter_data = data;
 	struct fnic *fnic = iter_data->fnic;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 8352f90d997d..315c7ac730e9 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -566,8 +566,7 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
-static bool scsi_host_check_in_flight(struct request *rq, void *data,
-				      bool reserved)
+static bool scsi_host_check_in_flight(struct request *rq, void *data)
 {
 	int *count = data;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
@@ -662,7 +661,7 @@ void scsi_flush_work(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL_GPL(scsi_flush_work);
 
-static bool complete_all_cmds_iter(struct request *rq, void *data, bool rsvd)
+static bool complete_all_cmds_iter(struct request *rq, void *data)
 {
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
 	enum scsi_host_status status = *(enum scsi_host_status *)data;
@@ -693,17 +692,16 @@ void scsi_host_complete_all_commands(struct Scsi_Host *shost,
 EXPORT_SYMBOL_GPL(scsi_host_complete_all_commands);
 
 struct scsi_host_busy_iter_data {
-	bool (*fn)(struct scsi_cmnd *, void *, bool);
+	bool (*fn)(struct scsi_cmnd *, void *);
 	void *priv;
 };
 
-static bool __scsi_host_busy_iter_fn(struct request *req, void *priv,
-				   bool reserved)
+static bool __scsi_host_busy_iter_fn(struct request *req, void *priv)
 {
 	struct scsi_host_busy_iter_data *iter_data = priv;
 	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
 
-	return iter_data->fn(sc, iter_data->priv, reserved);
+	return iter_data->fn(sc, iter_data->priv);
 }
 
 /**
@@ -716,7 +714,7 @@ static bool __scsi_host_busy_iter_fn(struct request *req, void *priv,
  * ithas to be provided by the caller
  **/
 void scsi_host_busy_iter(struct Scsi_Host *shost,
-			 bool (*fn)(struct scsi_cmnd *, void *, bool),
+			 bool (*fn)(struct scsi_cmnd *, void *),
 			 void *priv)
 {
 	struct scsi_host_busy_iter_data iter_data = {
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index d8c195b7ca57..59a18769a4fe 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -381,14 +381,12 @@ void mpi3mr_invalidate_devhandles(struct mpi3mr_ioc *mrioc)
  * mpi3mr_print_scmd - print individual SCSI command
  * @rq: Block request
  * @data: Adapter instance reference
- * @reserved: N/A. Currently not used
  *
  * Print the SCSI command details if it is in LLD scope.
  *
  * Return: true always.
  */
-static bool mpi3mr_print_scmd(struct request *rq,
-	void *data, bool reserved)
+static bool mpi3mr_print_scmd(struct request *rq, void *data)
 {
 	struct mpi3mr_ioc *mrioc = (struct mpi3mr_ioc *)data;
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
@@ -412,7 +410,6 @@ out:
  * mpi3mr_flush_scmd - Flush individual SCSI command
  * @rq: Block request
  * @data: Adapter instance reference
- * @reserved: N/A. Currently not used
  *
  * Return the SCSI command to the upper layers if it is in LLD
  * scope.
@@ -420,8 +417,7 @@ out:
  * Return: true always.
  */
 
-static bool mpi3mr_flush_scmd(struct request *rq,
-	void *data, bool reserved)
+static bool mpi3mr_flush_scmd(struct request *rq, void *data)
 {
 	struct mpi3mr_ioc *mrioc = (struct mpi3mr_ioc *)data;
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
@@ -451,7 +447,6 @@ out:
  * mpi3mr_count_dev_pending - Count commands pending for a lun
  * @rq: Block request
  * @data: SCSI device reference
- * @reserved: Unused
  *
  * This is an iterator function called for each SCSI command in
  * a host and if the command is pending in the LLD for the
@@ -461,8 +456,7 @@ out:
  * Return: true always.
  */
 
-static bool mpi3mr_count_dev_pending(struct request *rq,
-	void *data, bool reserved)
+static bool mpi3mr_count_dev_pending(struct request *rq, void *data)
 {
 	struct scsi_device *sdev = (struct scsi_device *)data;
 	struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata;
@@ -485,7 +479,6 @@ out:
  * mpi3mr_count_tgt_pending - Count commands pending for target
  * @rq: Block request
  * @data: SCSI target reference
- * @reserved: Unused
  *
  * This is an iterator function called for each SCSI command in
  * a host and if the command is pending in the LLD for the
@@ -495,8 +488,7 @@ out:
  * Return: true always.
  */
 
-static bool mpi3mr_count_tgt_pending(struct request *rq,
-	void *data, bool reserved)
+static bool mpi3mr_count_tgt_pending(struct request *rq, void *data)
 {
 	struct scsi_target *starget = (struct scsi_target *)data;
 	struct mpi3mr_stgt_priv_data *stgt_priv_data = starget->hostdata;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c84c56d296fe..810a24884f7e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -520,7 +520,7 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
+typedef bool (busy_tag_iter_fn)(struct request *, void *);
 
 /**
  * struct blk_mq_ops - Callback functions that implements block driver
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 667d889b92b5..65082ecdd557 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -786,7 +786,7 @@ extern int scsi_host_block(struct Scsi_Host *shost);
 extern int scsi_host_unblock(struct Scsi_Host *shost, int new_state);
 
 void scsi_host_busy_iter(struct Scsi_Host *,
-			 bool (*fn)(struct scsi_cmnd *, void *, bool), void *priv);
+			 bool (*fn)(struct scsi_cmnd *, void *), void *priv);
 
 struct class_container;
 
-- 
cgit v1.2.3-59-g8ed1b


From 4cf6e6c0106bf6e6d034fa6043b4428ac2f267fc Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 6 Jul 2022 20:03:54 +0800
Subject: blk-mq: Drop local variable for reserved tag

The local variable is now only referenced once so drop it.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/1657109034-206040-7-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 4e9b8ec55bda..8e3b36d1cb57 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -262,7 +262,6 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	struct blk_mq_hw_ctx *hctx = iter_data->hctx;
 	struct request_queue *q = iter_data->q;
 	struct blk_mq_tag_set *set = q->tag_set;
-	bool reserved = iter_data->reserved;
 	struct blk_mq_tags *tags;
 	struct request *rq;
 	bool ret = true;
@@ -272,7 +271,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	else
 		tags = hctx->tags;
 
-	if (!reserved)
+	if (!iter_data->reserved)
 		bitnr += tags->nr_reserved_tags;
 	/*
 	 * We can hit rq == NULL here, because the tagging functions
@@ -333,12 +332,11 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
 	struct bt_tags_iter_data *iter_data = data;
 	struct blk_mq_tags *tags = iter_data->tags;
-	bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
 	struct request *rq;
 	bool ret = true;
 	bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
 
-	if (!reserved)
+	if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
 		bitnr += tags->nr_reserved_tags;
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 6cc37a672a1e21245b931722a016b3bd4ae10e2d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:36 +0200
Subject: block: call blk_queue_free_zone_bitmaps from disk_release

The zone bitmaps are only used for non-passthrough I/O, so free them as
soon as the disk is released.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 2 --
 block/genhd.c     | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 58cb9cb9f48c..7590810cf13f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -776,8 +776,6 @@ static void blk_release_queue(struct kobject *kobj)
 	blk_free_queue_stats(q->stats);
 	kfree(q->poll_stat);
 
-	blk_queue_free_zone_bitmaps(q);
-
 	if (queue_is_mq(q))
 		blk_mq_release(q);
 
diff --git a/block/genhd.c b/block/genhd.c
index b1fb7e058b9c..d0bdeb93e922 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1165,6 +1165,7 @@ static void disk_release(struct device *dev)
 
 	disk_release_events(disk);
 	kfree(disk->random);
+	blk_queue_free_zone_bitmaps(disk->queue);
 	xa_destroy(&disk->part_tbl);
 
 	disk->queue->disk = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From edd1dbc83b1de3b98590b76e09b86ddf6887fce7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:37 +0200
Subject: block: use bdev_is_zoned instead of open coding it

Use bdev_is_zoned in all places where a block_device is available instead
of open coding it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c           | 2 +-
 block/blk-core.c      | 6 +++---
 block/blk-mq.h        | 2 +-
 block/blk-zoned.c     | 9 ++++-----
 drivers/md/dm-table.c | 2 +-
 drivers/md/dm-zone.c  | 2 +-
 drivers/md/dm.c       | 2 +-
 7 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 933ea3210954..888ee81ea303 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1033,7 +1033,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
 	if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
 		return 0;
 
-	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
+	if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev)))
 		return 0;
 
 	return bio_add_hw_page(q, bio, page, len, offset,
diff --git a/block/blk-core.c b/block/blk-core.c
index 5ad7bd93077c..6bcca0b686de 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -569,7 +569,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 	int nr_sectors = bio_sectors(bio);
 
 	/* Only applicable to zoned block devices */
-	if (!blk_queue_is_zoned(q))
+	if (!bdev_is_zoned(bio->bi_bdev))
 		return BLK_STS_NOTSUPP;
 
 	/* The bio sector must point to the start of a sequential zone */
@@ -775,11 +775,11 @@ void submit_bio_noacct(struct bio *bio)
 	case REQ_OP_ZONE_OPEN:
 	case REQ_OP_ZONE_CLOSE:
 	case REQ_OP_ZONE_FINISH:
-		if (!blk_queue_is_zoned(q))
+		if (!bdev_is_zoned(bio->bi_bdev))
 			goto not_supported;
 		break;
 	case REQ_OP_ZONE_RESET_ALL:
-		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+		if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
 			goto not_supported;
 		break;
 	case REQ_OP_WRITE_ZEROES:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 54e20edf0da3..31d75a83a562 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -317,7 +317,7 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
 	 * For regular block devices or read operations, use the context plug
 	 * which may be NULL if blk_start_plug() was not executed.
 	 */
-	if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio)))
+	if (!bdev_is_zoned(bio->bi_bdev) || !op_is_write(bio_op(bio)))
 		return current->plug;
 
 	/* Zoned block device write operation case: do not plug the BIO */
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 38cd840d8838..90a5c9cc80ab 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -149,8 +149,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 	struct gendisk *disk = bdev->bd_disk;
 	sector_t capacity = get_capacity(disk);
 
-	if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
-	    WARN_ON_ONCE(!disk->fops->report_zones))
+	if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
 		return -EOPNOTSUPP;
 
 	if (!nr_zones || sector >= capacity)
@@ -268,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	struct bio *bio = NULL;
 	int ret = 0;
 
-	if (!blk_queue_is_zoned(q))
+	if (!bdev_is_zoned(bdev))
 		return -EOPNOTSUPP;
 
 	if (bdev_read_only(bdev))
@@ -350,7 +349,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!q)
 		return -ENXIO;
 
-	if (!blk_queue_is_zoned(q))
+	if (!bdev_is_zoned(bdev))
 		return -ENOTTY;
 
 	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
@@ -408,7 +407,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!q)
 		return -ENXIO;
 
-	if (!blk_queue_is_zoned(q))
+	if (!bdev_is_zoned(bdev))
 		return -ENOTTY;
 
 	if (!(mode & FMODE_WRITE))
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd539afbfe88..b36b528e56cf 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1623,7 +1623,7 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *
 	struct request_queue *q = bdev_get_queue(dev->bdev);
 	unsigned int *zone_sectors = data;
 
-	if (!blk_queue_is_zoned(q))
+	if (!bdev_is_zoned(dev->bdev))
 		return 0;
 
 	return blk_queue_zone_sectors(q) != *zone_sectors;
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 3e7b1fe1580b..ae616b87c91a 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -270,7 +270,7 @@ static int device_not_zone_append_capable(struct dm_target *ti,
 					  struct dm_dev *dev, sector_t start,
 					  sector_t len, void *data)
 {
-	return !blk_queue_is_zoned(bdev_get_queue(dev->bdev));
+	return !bdev_is_zoned(dev->bdev);
 }
 
 static bool dm_table_supports_zone_append(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8872f9c63688..33d3799bb66e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1033,7 +1033,7 @@ static void clone_endio(struct bio *bio)
 	}
 
 	if (static_branch_unlikely(&zoned_enabled) &&
-	    unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev))))
+	    unlikely(bdev_is_zoned(bio->bi_bdev)))
 		dm_zone_endio(io, bio);
 
 	if (endio) {
-- 
cgit v1.2.3-59-g8ed1b


From 6deacb3bfac2b720e707c566549a7041f17db9c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:38 +0200
Subject: block: simplify blk_mq_plug

Drop the unused q argument, and invert the check to move the exception
into a branch and the regular path as the normal return.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  |  2 +-
 block/blk-merge.c |  2 +-
 block/blk-mq.c    |  2 +-
 block/blk-mq.h    | 18 ++++++++----------
 4 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6bcca0b686de..bc16e9bae2dc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -719,7 +719,7 @@ void submit_bio_noacct(struct bio *bio)
 
 	might_sleep();
 
-	plug = blk_mq_plug(q, bio);
+	plug = blk_mq_plug(bio);
 	if (plug && plug->nowait)
 		bio->bi_opf |= REQ_NOWAIT;
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 0f5f42ebd0bb..5abf5aa5a5f0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -1051,7 +1051,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	struct blk_plug *plug;
 	struct request *rq;
 
-	plug = blk_mq_plug(q, bio);
+	plug = blk_mq_plug(bio);
 	if (!plug || rq_list_empty(plug->mq_list))
 		return false;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 63385742b8a8..f1b84e20b1a9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2808,7 +2808,7 @@ static void bio_set_ioprio(struct bio *bio)
 void blk_mq_submit_bio(struct bio *bio)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	struct blk_plug *plug = blk_mq_plug(q, bio);
+	struct blk_plug *plug = blk_mq_plug(bio);
 	const int is_sync = op_is_sync(bio->bi_opf);
 	struct request *rq;
 	unsigned int nr_segs = 1;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 31d75a83a562..e694ec67d646 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -294,7 +294,6 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
 
 /*
  * blk_mq_plug() - Get caller context plug
- * @q: request queue
  * @bio : the bio being submitted by the caller context
  *
  * Plugging, by design, may delay the insertion of BIOs into the elevator in
@@ -305,23 +304,22 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
  * order. While this is not a problem with regular block devices, this ordering
  * change can cause write BIO failures with zoned block devices as these
  * require sequential write patterns to zones. Prevent this from happening by
- * ignoring the plug state of a BIO issuing context if the target request queue
- * is for a zoned block device and the BIO to plug is a write operation.
+ * ignoring the plug state of a BIO issuing context if it is for a zoned block
+ * device and the BIO to plug is a write operation.
  *
  * Return current->plug if the bio can be plugged and NULL otherwise
  */
-static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
-					   struct bio *bio)
+static inline struct blk_plug *blk_mq_plug( struct bio *bio)
 {
+	/* Zoned block device write operation case: do not plug the BIO */
+	if (bdev_is_zoned(bio->bi_bdev) && op_is_write(bio_op(bio)))
+		return NULL;
+
 	/*
 	 * For regular block devices or read operations, use the context plug
 	 * which may be NULL if blk_start_plug() was not executed.
 	 */
-	if (!bdev_is_zoned(bio->bi_bdev) || !op_is_write(bio_op(bio)))
-		return current->plug;
-
-	/* Zoned block device write operation case: do not plug the BIO */
-	return NULL;
+	return current->plug;
 }
 
 /* Free all requests on the list */
-- 
cgit v1.2.3-59-g8ed1b


From 052e545c9276f97e86368579fda32aa1ac017d51 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:39 +0200
Subject: block: simplify blk_check_zone_append

Use the bdev based helpers instead of open coding them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index bc16e9bae2dc..b530ce7b370c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -565,7 +565,6 @@ static int blk_partition_remap(struct bio *bio)
 static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 						 struct bio *bio)
 {
-	sector_t pos = bio->bi_iter.bi_sector;
 	int nr_sectors = bio_sectors(bio);
 
 	/* Only applicable to zoned block devices */
@@ -573,8 +572,8 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 		return BLK_STS_NOTSUPP;
 
 	/* The bio sector must point to the start of a sequential zone */
-	if (pos & (blk_queue_zone_sectors(q) - 1) ||
-	    !blk_queue_zone_is_seq(q, pos))
+	if (bio->bi_iter.bi_sector & (bdev_zone_sectors(bio->bi_bdev) - 1) ||
+	    !bio_zone_is_seq(bio))
 		return BLK_STS_IOERR;
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 6b2bd274744e6454ba7bbbe6a09b44866f2f414a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:40 +0200
Subject: block: pass a gendisk to blk_queue_set_zoned

Prepare for storing the zone related field in struct gendisk instead
of struct request_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c           | 9 +++++----
 block/partitions/core.c        | 2 +-
 drivers/block/null_blk/zoned.c | 2 +-
 drivers/nvme/host/zns.c        | 2 +-
 drivers/scsi/sd.c              | 6 +++---
 drivers/scsi/sd_zbc.c          | 2 +-
 include/linux/blkdev.h         | 2 +-
 7 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'block')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6ccceb421ed2..35b7bba306a8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -893,18 +893,19 @@ static bool disk_has_partitions(struct gendisk *disk)
 }
 
 /**
- * blk_queue_set_zoned - configure a disk queue zoned model.
+ * disk_set_zoned - configure the zoned model for a disk
  * @disk:	the gendisk of the queue to configure
  * @model:	the zoned model to set
  *
- * Set the zoned model of the request queue of @disk according to @model.
+ * Set the zoned model of @disk to @model.
+ *
  * When @model is BLK_ZONED_HM (host managed), this should be called only
  * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option).
  * If @model specifies BLK_ZONED_HA (host aware), the effective model used
  * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions
  * on the disk.
  */
-void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
+void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 {
 	struct request_queue *q = disk->queue;
 
@@ -948,7 +949,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 		blk_queue_clear_zone_settings(q);
 	}
 }
-EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
+EXPORT_SYMBOL_GPL(disk_set_zoned);
 
 int bdev_alignment_offset(struct block_device *bdev)
 {
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 7dc487f5b03c..1a45b1dd6491 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -330,7 +330,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 	case BLK_ZONED_HA:
 		pr_info("%s: disabling host aware zoned block device support due to partitions\n",
 			disk->disk_name);
-		blk_queue_set_zoned(disk, BLK_ZONED_NONE);
+		disk_set_zoned(disk, BLK_ZONED_NONE);
 		break;
 	case BLK_ZONED_NONE:
 		break;
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 2fdd7b20c224..b47bbd114058 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -159,7 +159,7 @@ int null_register_zoned_dev(struct nullb *nullb)
 	struct nullb_device *dev = nullb->dev;
 	struct request_queue *q = nullb->q;
 
-	blk_queue_set_zoned(nullb->disk, BLK_ZONED_HM);
+	disk_set_zoned(nullb->disk, BLK_ZONED_HM);
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
 	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
 
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 9f81beb4df4e..0ed15c2fd56d 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -109,7 +109,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
 		goto free_data;
 	}
 
-	blk_queue_set_zoned(ns->disk, BLK_ZONED_HM);
+	disk_set_zoned(ns->disk, BLK_ZONED_HM);
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
 	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
 	blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb587e488601..eb02d939dd44 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2934,15 +2934,15 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
 
 	if (sdkp->device->type == TYPE_ZBC) {
 		/* Host-managed */
-		blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HM);
+		disk_set_zoned(sdkp->disk, BLK_ZONED_HM);
 	} else {
 		sdkp->zoned = zoned;
 		if (sdkp->zoned == 1) {
 			/* Host-aware */
-			blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HA);
+			disk_set_zoned(sdkp->disk, BLK_ZONED_HA);
 		} else {
 			/* Regular disk or drive managed disk */
-			blk_queue_set_zoned(sdkp->disk, BLK_ZONED_NONE);
+			disk_set_zoned(sdkp->disk, BLK_ZONED_NONE);
 		}
 	}
 
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 6acc4f406eb8..0f5823b67468 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -929,7 +929,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
 		/*
 		 * This can happen for a host aware disk with partitions.
 		 * The block device zone model was already cleared by
-		 * blk_queue_set_zoned(). Only free the scsi disk zone
+		 * disk_set_zoned(). Only free the scsi disk zone
 		 * information and exit early.
 		 */
 		sd_zbc_free_zone_info(sdkp);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 270cd0c55292..416faa013782 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -291,7 +291,7 @@ struct queue_limits {
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
 			       void *data);
 
-void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
+void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-- 
cgit v1.2.3-59-g8ed1b


From b3c72f8138b5f967a9fa527af84b35018897aba3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:41 +0200
Subject: block: pass a gendisk to blk_queue_clear_zone_settings

Switch to a gendisk based API in preparation for moving all zone related
fields from the request_queue to the gendisk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 2 +-
 block/blk-zoned.c    | 4 +++-
 block/blk.h          | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 35b7bba306a8..8bb9eef5310e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -946,7 +946,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 		blk_queue_zone_write_granularity(q,
 						queue_logical_block_size(q));
 	} else {
-		blk_queue_clear_zone_settings(q);
+		disk_clear_zone_settings(disk);
 	}
 }
 EXPORT_SYMBOL_GPL(disk_set_zoned);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 90a5c9cc80ab..82a4fa89678c 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -622,8 +622,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
 }
 EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
 
-void blk_queue_clear_zone_settings(struct request_queue *q)
+void disk_clear_zone_settings(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
+
 	blk_mq_freeze_queue(q);
 
 	blk_queue_free_zone_bitmaps(q);
diff --git a/block/blk.h b/block/blk.h
index 58ad50cacd2d..7482a3a441dd 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -406,10 +406,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
 
 #ifdef CONFIG_BLK_DEV_ZONED
 void blk_queue_free_zone_bitmaps(struct request_queue *q);
-void blk_queue_clear_zone_settings(struct request_queue *q);
+void disk_clear_zone_settings(struct gendisk *disk);
 #else
 static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
-static inline void blk_queue_clear_zone_settings(struct request_queue *q) {}
+static inline void disk_clear_zone_settings(struct gendisk *disk) {}
 #endif
 
 int blk_alloc_ext_minor(void);
-- 
cgit v1.2.3-59-g8ed1b


From 5d40066567a73a67ddb656ad118c6cfa1c4a6d71 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:42 +0200
Subject: block: pass a gendisk to blk_queue_free_zone_bitmaps

Switch to a gendisk based API in preparation for moving all zone related
fields from the request_queue to the gendisk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c | 8 +++++---
 block/blk.h       | 4 ++--
 block/genhd.c     | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 82a4fa89678c..0d431394cf90 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -449,8 +449,10 @@ fail:
 	return ret;
 }
 
-void blk_queue_free_zone_bitmaps(struct request_queue *q)
+void disk_free_zone_bitmaps(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
+
 	kfree(q->conv_zones_bitmap);
 	q->conv_zones_bitmap = NULL;
 	kfree(q->seq_zones_wlock);
@@ -612,7 +614,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
 		ret = 0;
 	} else {
 		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
-		blk_queue_free_zone_bitmaps(q);
+		disk_free_zone_bitmaps(disk);
 	}
 	blk_mq_unfreeze_queue(q);
 
@@ -628,7 +630,7 @@ void disk_clear_zone_settings(struct gendisk *disk)
 
 	blk_mq_freeze_queue(q);
 
-	blk_queue_free_zone_bitmaps(q);
+	disk_free_zone_bitmaps(disk);
 	blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q);
 	q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE;
 	q->nr_zones = 0;
diff --git a/block/blk.h b/block/blk.h
index 7482a3a441dd..b71e22c97d77 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -405,10 +405,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
 #endif
 
 #ifdef CONFIG_BLK_DEV_ZONED
-void blk_queue_free_zone_bitmaps(struct request_queue *q);
+void disk_free_zone_bitmaps(struct gendisk *disk);
 void disk_clear_zone_settings(struct gendisk *disk);
 #else
-static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
 static inline void disk_clear_zone_settings(struct gendisk *disk) {}
 #endif
 
diff --git a/block/genhd.c b/block/genhd.c
index d0bdeb93e922..9d30f159c59a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1165,7 +1165,7 @@ static void disk_release(struct device *dev)
 
 	disk_release_events(disk);
 	kfree(disk->random);
-	blk_queue_free_zone_bitmaps(disk->queue);
+	disk_free_zone_bitmaps(disk);
 	xa_destroy(&disk->part_tbl);
 
 	disk->queue->disk = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 1dc0172027b0aa09823b430e395b1116d2745f36 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:43 +0200
Subject: block: remove queue_max_open_zones and queue_max_active_zones

Always use the bdev based helpers instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c      |  4 ++--
 include/linux/blkdev.h | 37 ++++++++++---------------------------
 2 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7590810cf13f..5ce72345ac66 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -330,12 +330,12 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
 
 static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(queue_max_open_zones(q), page);
+	return queue_var_show(bdev_max_open_zones(q->disk->part0), page);
 }
 
 static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(queue_max_active_zones(q), page);
+	return queue_var_show(bdev_max_active_zones(q->disk->part0), page);
 }
 
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 416faa013782..7d4105d23b0a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -702,21 +702,22 @@ static inline void blk_queue_max_open_zones(struct request_queue *q,
 	q->max_open_zones = max_open_zones;
 }
 
-static inline unsigned int queue_max_open_zones(const struct request_queue *q)
-{
-	return q->max_open_zones;
-}
-
 static inline void blk_queue_max_active_zones(struct request_queue *q,
 		unsigned int max_active_zones)
 {
 	q->max_active_zones = max_active_zones;
 }
 
-static inline unsigned int queue_max_active_zones(const struct request_queue *q)
+static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
+{
+	return bdev->bd_disk->queue->max_open_zones;
+}
+
+static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
 {
-	return q->max_active_zones;
+	return bdev->bd_disk->queue->max_active_zones;
 }
+
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 {
@@ -732,11 +733,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 {
 	return 0;
 }
-static inline unsigned int queue_max_open_zones(const struct request_queue *q)
+static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
 {
 	return 0;
 }
-static inline unsigned int queue_max_active_zones(const struct request_queue *q)
+static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
 {
 	return 0;
 }
@@ -1314,24 +1315,6 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
 	return 0;
 }
 
-static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return queue_max_open_zones(q);
-	return 0;
-}
-
-static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return queue_max_active_zones(q);
-	return 0;
-}
-
 static inline int queue_dma_alignment(const struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3-59-g8ed1b


From b623e347323f6464b20fb0d899a0a73522ed8f6c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:45 +0200
Subject: block: replace blkdev_nr_zones with bdev_nr_zones

Pass a block_device instead of a request_queue as that is what most
callers have at hand.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c              | 15 ++++++++-------
 block/ioctl.c                  |  2 +-
 drivers/block/null_blk/zoned.c |  2 +-
 drivers/md/dm-zone.c           |  2 +-
 drivers/md/dm-zoned-target.c   |  5 ++---
 drivers/nvme/target/zns.c      |  6 +++---
 fs/zonefs/super.c              | 17 ++++++++---------
 include/linux/blkdev.h         |  4 ++--
 8 files changed, 26 insertions(+), 27 deletions(-)

(limited to 'block')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 0d431394cf90..2dec25d8aa3b 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -108,21 +108,22 @@ void __blk_req_zone_write_unlock(struct request *rq)
 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
 
 /**
- * blkdev_nr_zones - Get number of zones
- * @disk:	Target gendisk
+ * bdev_nr_zones - Get number of zones
+ * @bdev:	Target device
  *
  * Return the total number of zones of a zoned block device.  For a block
  * device without zone capabilities, the number of zones is always 0.
  */
-unsigned int blkdev_nr_zones(struct gendisk *disk)
+unsigned int bdev_nr_zones(struct block_device *bdev)
 {
-	sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
+	sector_t zone_sectors = bdev_zone_sectors(bdev);
 
-	if (!blk_queue_is_zoned(disk->queue))
+	if (!bdev_is_zoned(bdev))
 		return 0;
-	return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
+	return (bdev_nr_sectors(bdev) + zone_sectors - 1) >>
+		ilog2(zone_sectors);
 }
-EXPORT_SYMBOL_GPL(blkdev_nr_zones);
+EXPORT_SYMBOL_GPL(bdev_nr_zones);
 
 /**
  * blkdev_report_zones - Get zones information
diff --git a/block/ioctl.c b/block/ioctl.c
index 46949f1b0dba..60121e89052b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -495,7 +495,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 	case BLKGETZONESZ:
 		return put_uint(argp, bdev_zone_sectors(bdev));
 	case BLKGETNRZONES:
-		return put_uint(argp, blkdev_nr_zones(bdev->bd_disk));
+		return put_uint(argp, bdev_nr_zones(bdev));
 	case BLKROGET:
 		return put_int(argp, bdev_read_only(bdev) != 0);
 	case BLKSSZGET: /* get block device logical block size */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 576ab3ed082a..e62c52e96425 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -170,7 +170,7 @@ int null_register_zoned_dev(struct nullb *nullb)
 			return ret;
 	} else {
 		blk_queue_chunk_sectors(q, dev->zone_size_sects);
-		q->nr_zones = blkdev_nr_zones(nullb->disk);
+		q->nr_zones = bdev_nr_zones(nullb->disk->part0);
 	}
 
 	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index ae616b87c91a..6d105abe1241 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -301,7 +301,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 	 * correct value to be exposed in sysfs queue/nr_zones.
 	 */
 	WARN_ON_ONCE(queue_is_mq(q));
-	q->nr_zones = blkdev_nr_zones(md->disk);
+	q->nr_zones = bdev_nr_zones(md->disk->part0);
 
 	/* Check if zone append is natively supported */
 	if (dm_table_supports_zone_append(t)) {
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 0ec5d8b9b1a4..6ba6ef44b00e 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -793,8 +793,7 @@ static int dmz_fixup_devices(struct dm_target *ti)
 			}
 			zone_nr_sectors = blk_queue_zone_sectors(q);
 			zoned_dev->zone_nr_sectors = zone_nr_sectors;
-			zoned_dev->nr_zones =
-				blkdev_nr_zones(zoned_dev->bdev->bd_disk);
+			zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev);
 		}
 	} else {
 		reg_dev = NULL;
@@ -805,7 +804,7 @@ static int dmz_fixup_devices(struct dm_target *ti)
 		}
 		q = bdev_get_queue(zoned_dev->bdev);
 		zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q);
-		zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk);
+		zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev);
 	}
 
 	if (reg_dev) {
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 82b61acf7a72..c4c99b832daf 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -60,7 +60,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
 	if (ns->bdev->bd_disk->queue->conv_zones_bitmap)
 		return false;
 
-	ret = blkdev_report_zones(ns->bdev, 0, blkdev_nr_zones(bd_disk),
+	ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev),
 				  validate_conv_zones_cb, NULL);
 	if (ret < 0)
 		return false;
@@ -241,7 +241,7 @@ static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
 {
 	unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
 
-	return blkdev_nr_zones(req->ns->bdev->bd_disk) -
+	return bdev_nr_zones(req->ns->bdev) -
 		(sect >> ilog2(bdev_zone_sectors(req->ns->bdev)));
 }
 
@@ -386,7 +386,7 @@ static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d)
 static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
 {
 	struct block_device *bdev = req->ns->bdev;
-	unsigned int nr_zones = blkdev_nr_zones(bdev->bd_disk);
+	unsigned int nr_zones = bdev_nr_zones(bdev);
 	struct request_queue *q = bdev_get_queue(bdev);
 	struct bio *bio = NULL;
 	sector_t sector = 0;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 053299758deb..9c0eef1ff32a 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1394,7 +1394,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
 {
 	struct super_block *sb = parent->i_sb;
 
-	inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
+	inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
 	inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
 	inode->i_op = &zonefs_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
@@ -1540,7 +1540,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
 	/*
 	 * The first zone contains the super block: skip it.
 	 */
-	end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk);
+	end = zd->zones + bdev_nr_zones(sb->s_bdev);
 	for (zone = &zd->zones[1]; zone < end; zone = next) {
 
 		next = zone + 1;
@@ -1635,8 +1635,8 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
 	struct block_device *bdev = zd->sb->s_bdev;
 	int ret;
 
-	zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk),
-			     sizeof(struct blk_zone), GFP_KERNEL);
+	zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone),
+			     GFP_KERNEL);
 	if (!zd->zones)
 		return -ENOMEM;
 
@@ -1648,9 +1648,9 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
 		return ret;
 	}
 
-	if (ret != blkdev_nr_zones(bdev->bd_disk)) {
+	if (ret != bdev_nr_zones(bdev)) {
 		zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
-			   ret, blkdev_nr_zones(bdev->bd_disk));
+			   ret, bdev_nr_zones(bdev));
 		return -EIO;
 	}
 
@@ -1816,8 +1816,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
 	if (ret)
 		goto cleanup;
 
-	zonefs_info(sb, "Mounting %u zones",
-		    blkdev_nr_zones(sb->s_bdev->bd_disk));
+	zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
 
 	if (!sbi->s_max_wro_seq_files &&
 	    !sbi->s_max_active_seq_files &&
@@ -1833,7 +1832,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!inode)
 		goto cleanup;
 
-	inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk);
+	inode->i_ino = bdev_nr_zones(sb->s_bdev);
 	inode->i_mode = S_IFDIR | 0555;
 	inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
 	inode->i_op = &zonefs_dir_inode_operations;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c05e1cc05c26..fa2757ef4a84 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -298,7 +298,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
 #define BLK_ALL_ZONES  ((unsigned int)-1)
 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
-unsigned int blkdev_nr_zones(struct gendisk *disk);
+unsigned int bdev_nr_zones(struct block_device *bdev);
 extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 			    sector_t sectors, sector_t nr_sectors,
 			    gfp_t gfp_mask);
@@ -312,7 +312,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 
 #else /* CONFIG_BLK_DEV_ZONED */
 
-static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
 {
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 375c140c199ebd2866f9c50a0b8853ffca3f1b68 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:46 +0200
Subject: block: use bdev based helpers in blkdev_zone_mgmt{,all}

Use the bdev based helpers instead of the queue based ones to clean up
the code a bit and prepare for storing all zone related fields in
struct gendisk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 2dec25d8aa3b..c2d8a38f449a 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -190,8 +190,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
 					  gfp_t gfp_mask)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	sector_t capacity = get_capacity(bdev->bd_disk);
-	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	sector_t capacity = bdev_nr_sectors(bdev);
+	sector_t zone_sectors = bdev_zone_sectors(bdev);
 	unsigned long *need_reset;
 	struct bio *bio = NULL;
 	sector_t sector = 0;
@@ -262,8 +262,8 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 		     gfp_t gfp_mask)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	sector_t zone_sectors = blk_queue_zone_sectors(q);
-	sector_t capacity = get_capacity(bdev->bd_disk);
+	sector_t zone_sectors = bdev_zone_sectors(bdev);
+	sector_t capacity = bdev_nr_sectors(bdev);
 	sector_t end_sector = sector + nr_sectors;
 	struct bio *bio = NULL;
 	int ret = 0;
-- 
cgit v1.2.3-59-g8ed1b


From d86e716aa40643e3eb8c69fab3a198146bf76dd6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Jul 2022 09:03:50 +0200
Subject: block: move zone related fields to struct gendisk

Move the zone related fields that are currently stored in
struct request_queue to struct gendisk as these are part of the highlevel
block layer API and are only used for non-passthrough I/O that requires
the gendisk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220706070350.1703384-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs-zoned.c   |  6 +--
 block/blk-sysfs.c              |  2 +-
 block/blk-zoned.c              | 45 ++++++++++-----------
 drivers/block/null_blk/zoned.c |  2 +-
 drivers/md/dm-zone.c           | 74 ++++++++++++++++------------------
 drivers/nvme/host/multipath.c  |  2 +-
 drivers/nvme/target/zns.c      |  4 +-
 drivers/scsi/sd_zbc.c          |  2 +-
 include/linux/blk-mq.h         |  8 ++--
 include/linux/blkdev.h         | 91 +++++++++++++++++++-----------------------
 10 files changed, 111 insertions(+), 125 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c
index 038cb627c868..a77b099c34b7 100644
--- a/block/blk-mq-debugfs-zoned.c
+++ b/block/blk-mq-debugfs-zoned.c
@@ -11,11 +11,11 @@ int queue_zone_wlock_show(void *data, struct seq_file *m)
 	struct request_queue *q = data;
 	unsigned int i;
 
-	if (!q->seq_zones_wlock)
+	if (!q->disk->seq_zones_wlock)
 		return 0;
 
-	for (i = 0; i < q->nr_zones; i++)
-		if (test_bit(i, q->seq_zones_wlock))
+	for (i = 0; i < q->disk->nr_zones; i++)
+		if (test_bit(i, q->disk->seq_zones_wlock))
 			seq_printf(m, "%u\n", i);
 
 	return 0;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5ce72345ac66..c0303026752d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -325,7 +325,7 @@ static ssize_t queue_zoned_show(struct request_queue *q, char *page)
 
 static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(blk_queue_nr_zones(q), page);
+	return queue_var_show(disk_nr_zones(q->disk), page);
 }
 
 static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index c2d8a38f449a..7c017458d5ce 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -57,10 +57,10 @@ EXPORT_SYMBOL_GPL(blk_zone_cond_str);
  */
 bool blk_req_needs_zone_write_lock(struct request *rq)
 {
-	if (!rq->q->seq_zones_wlock)
+	if (blk_rq_is_passthrough(rq))
 		return false;
 
-	if (blk_rq_is_passthrough(rq))
+	if (!rq->q->disk->seq_zones_wlock)
 		return false;
 
 	switch (req_op(rq)) {
@@ -77,7 +77,7 @@ bool blk_req_zone_write_trylock(struct request *rq)
 {
 	unsigned int zno = blk_rq_zone_no(rq);
 
-	if (test_and_set_bit(zno, rq->q->seq_zones_wlock))
+	if (test_and_set_bit(zno, rq->q->disk->seq_zones_wlock))
 		return false;
 
 	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
@@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
 void __blk_req_zone_write_lock(struct request *rq)
 {
 	if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
-					  rq->q->seq_zones_wlock)))
+					  rq->q->disk->seq_zones_wlock)))
 		return;
 
 	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
@@ -101,9 +101,9 @@ EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
 void __blk_req_zone_write_unlock(struct request *rq)
 {
 	rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
-	if (rq->q->seq_zones_wlock)
+	if (rq->q->disk->seq_zones_wlock)
 		WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
-						 rq->q->seq_zones_wlock));
+						 rq->q->disk->seq_zones_wlock));
 }
 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
 
@@ -189,7 +189,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
 static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
 					  gfp_t gfp_mask)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
+	struct gendisk *disk = bdev->bd_disk;
 	sector_t capacity = bdev_nr_sectors(bdev);
 	sector_t zone_sectors = bdev_zone_sectors(bdev);
 	unsigned long *need_reset;
@@ -197,19 +197,18 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
 	sector_t sector = 0;
 	int ret;
 
-	need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
+	need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
 	if (!need_reset)
 		return -ENOMEM;
 
-	ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
-				q->nr_zones, blk_zone_need_reset_cb,
-				need_reset);
+	ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
+				       blk_zone_need_reset_cb, need_reset);
 	if (ret < 0)
 		goto out_free_need_reset;
 
 	ret = 0;
 	while (sector < capacity) {
-		if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
+		if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
 			sector += zone_sectors;
 			continue;
 		}
@@ -452,12 +451,10 @@ fail:
 
 void disk_free_zone_bitmaps(struct gendisk *disk)
 {
-	struct request_queue *q = disk->queue;
-
-	kfree(q->conv_zones_bitmap);
-	q->conv_zones_bitmap = NULL;
-	kfree(q->seq_zones_wlock);
-	q->seq_zones_wlock = NULL;
+	kfree(disk->conv_zones_bitmap);
+	disk->conv_zones_bitmap = NULL;
+	kfree(disk->seq_zones_wlock);
+	disk->seq_zones_wlock = NULL;
 }
 
 struct blk_revalidate_zone_args {
@@ -607,9 +604,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
 	blk_mq_freeze_queue(q);
 	if (ret > 0) {
 		blk_queue_chunk_sectors(q, args.zone_sectors);
-		q->nr_zones = args.nr_zones;
-		swap(q->seq_zones_wlock, args.seq_zones_wlock);
-		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
+		disk->nr_zones = args.nr_zones;
+		swap(disk->seq_zones_wlock, args.seq_zones_wlock);
+		swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);
 		if (update_driver_data)
 			update_driver_data(disk);
 		ret = 0;
@@ -634,9 +631,9 @@ void disk_clear_zone_settings(struct gendisk *disk)
 	disk_free_zone_bitmaps(disk);
 	blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q);
 	q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE;
-	q->nr_zones = 0;
-	q->max_open_zones = 0;
-	q->max_active_zones = 0;
+	disk->nr_zones = 0;
+	disk->max_open_zones = 0;
+	disk->max_active_zones = 0;
 	q->limits.chunk_sectors = 0;
 	q->limits.zone_write_granularity = 0;
 	q->limits.max_zone_append_sectors = 0;
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index e62c52e96425..64b06caab984 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -170,7 +170,7 @@ int null_register_zoned_dev(struct nullb *nullb)
 			return ret;
 	} else {
 		blk_queue_chunk_sectors(q, dev->zone_size_sects);
-		q->nr_zones = bdev_nr_zones(nullb->disk->part0);
+		nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
 	}
 
 	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 842c31019b51..2b89cde30c9e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -139,13 +139,11 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
 
 void dm_cleanup_zoned_dev(struct mapped_device *md)
 {
-	struct request_queue *q = md->queue;
-
-	if (q) {
-		kfree(q->conv_zones_bitmap);
-		q->conv_zones_bitmap = NULL;
-		kfree(q->seq_zones_wlock);
-		q->seq_zones_wlock = NULL;
+	if (md->disk) {
+		kfree(md->disk->conv_zones_bitmap);
+		md->disk->conv_zones_bitmap = NULL;
+		kfree(md->disk->seq_zones_wlock);
+		md->disk->seq_zones_wlock = NULL;
 	}
 
 	kvfree(md->zwp_offset);
@@ -179,31 +177,31 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
 				 void *data)
 {
 	struct mapped_device *md = data;
-	struct request_queue *q = md->queue;
+	struct gendisk *disk = md->disk;
 
 	switch (zone->type) {
 	case BLK_ZONE_TYPE_CONVENTIONAL:
-		if (!q->conv_zones_bitmap) {
-			q->conv_zones_bitmap =
-				kcalloc(BITS_TO_LONGS(q->nr_zones),
+		if (!disk->conv_zones_bitmap) {
+			disk->conv_zones_bitmap =
+				kcalloc(BITS_TO_LONGS(disk->nr_zones),
 					sizeof(unsigned long), GFP_NOIO);
-			if (!q->conv_zones_bitmap)
+			if (!disk->conv_zones_bitmap)
 				return -ENOMEM;
 		}
-		set_bit(idx, q->conv_zones_bitmap);
+		set_bit(idx, disk->conv_zones_bitmap);
 		break;
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
-		if (!q->seq_zones_wlock) {
-			q->seq_zones_wlock =
-				kcalloc(BITS_TO_LONGS(q->nr_zones),
+		if (!disk->seq_zones_wlock) {
+			disk->seq_zones_wlock =
+				kcalloc(BITS_TO_LONGS(disk->nr_zones),
 					sizeof(unsigned long), GFP_NOIO);
-			if (!q->seq_zones_wlock)
+			if (!disk->seq_zones_wlock)
 				return -ENOMEM;
 		}
 		if (!md->zwp_offset) {
 			md->zwp_offset =
-				kvcalloc(q->nr_zones, sizeof(unsigned int),
+				kvcalloc(disk->nr_zones, sizeof(unsigned int),
 					 GFP_KERNEL);
 			if (!md->zwp_offset)
 				return -ENOMEM;
@@ -228,7 +226,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
  */
 static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
 {
-	struct request_queue *q = md->queue;
+	struct gendisk *disk = md->disk;
 	unsigned int noio_flag;
 	int ret;
 
@@ -236,7 +234,7 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
 	 * Check if something changed. If yes, cleanup the current resources
 	 * and reallocate everything.
 	 */
-	if (!q->nr_zones || q->nr_zones != md->nr_zones)
+	if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
 		dm_cleanup_zoned_dev(md);
 	if (md->nr_zones)
 		return 0;
@@ -246,17 +244,17 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
 	 * operations in this context are done as if GFP_NOIO was specified.
 	 */
 	noio_flag = memalloc_noio_save();
-	ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones,
+	ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones,
 				     dm_zone_revalidate_cb, md);
 	memalloc_noio_restore(noio_flag);
 	if (ret < 0)
 		goto err;
-	if (ret != q->nr_zones) {
+	if (ret != disk->nr_zones) {
 		ret = -EIO;
 		goto err;
 	}
 
-	md->nr_zones = q->nr_zones;
+	md->nr_zones = disk->nr_zones;
 
 	return 0;
 
@@ -301,7 +299,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 	 * correct value to be exposed in sysfs queue/nr_zones.
 	 */
 	WARN_ON_ONCE(queue_is_mq(q));
-	q->nr_zones = bdev_nr_zones(md->disk->part0);
+	md->disk->nr_zones = bdev_nr_zones(md->disk->part0);
 
 	/* Check if zone append is natively supported */
 	if (dm_table_supports_zone_append(t)) {
@@ -466,26 +464,26 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z
 	}
 }
 
-static inline void dm_zone_lock(struct request_queue *q,
-				unsigned int zno, struct bio *clone)
+static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno,
+				struct bio *clone)
 {
 	if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
 		return;
 
-	wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
 	bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
 }
 
-static inline void dm_zone_unlock(struct request_queue *q,
-				  unsigned int zno, struct bio *clone)
+static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno,
+				  struct bio *clone)
 {
 	if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
 		return;
 
-	WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock));
-	clear_bit_unlock(zno, q->seq_zones_wlock);
+	WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock));
+	clear_bit_unlock(zno, disk->seq_zones_wlock);
 	smp_mb__after_atomic();
-	wake_up_bit(q->seq_zones_wlock, zno);
+	wake_up_bit(disk->seq_zones_wlock, zno);
 
 	bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
 }
@@ -520,7 +518,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
 	struct dm_io *io = tio->io;
 	struct dm_target *ti = tio->ti;
 	struct mapped_device *md = io->md;
-	struct request_queue *q = md->queue;
 	struct bio *clone = &tio->clone;
 	struct orig_bio_details orig_bio_details;
 	unsigned int zno;
@@ -536,7 +533,7 @@ int dm_zone_map_bio(struct dm_target_io *tio)
 
 	/* Lock the target zone */
 	zno = bio_zone_no(clone);
-	dm_zone_lock(q, zno, clone);
+	dm_zone_lock(md->disk, zno, clone);
 
 	orig_bio_details.nr_sectors = bio_sectors(clone);
 	orig_bio_details.op = bio_op(clone);
@@ -546,7 +543,7 @@ int dm_zone_map_bio(struct dm_target_io *tio)
 	 * both valid, and if the bio is a zone append, remap it to a write.
 	 */
 	if (!dm_zone_map_bio_begin(md, zno, clone)) {
-		dm_zone_unlock(q, zno, clone);
+		dm_zone_unlock(md->disk, zno, clone);
 		return DM_MAPIO_KILL;
 	}
 
@@ -570,12 +567,12 @@ int dm_zone_map_bio(struct dm_target_io *tio)
 		sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
 					  *tio->len_ptr);
 		if (sts != BLK_STS_OK)
-			dm_zone_unlock(q, zno, clone);
+			dm_zone_unlock(md->disk, zno, clone);
 		break;
 	case DM_MAPIO_REQUEUE:
 	case DM_MAPIO_KILL:
 	default:
-		dm_zone_unlock(q, zno, clone);
+		dm_zone_unlock(md->disk, zno, clone);
 		sts = BLK_STS_IOERR;
 		break;
 	}
@@ -592,7 +589,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
 void dm_zone_endio(struct dm_io *io, struct bio *clone)
 {
 	struct mapped_device *md = io->md;
-	struct request_queue *q = md->queue;
 	struct gendisk *disk = md->disk;
 	struct bio *orig_bio = io->orig_bio;
 	unsigned int zwp_offset;
@@ -651,5 +647,5 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
 				zwp_offset - bio_sectors(orig_bio);
 	}
 
-	dm_zone_unlock(q, zno, clone);
+	dm_zone_unlock(disk, zno, clone);
 }
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index ccf9a6da8f6e..f26640ccb955 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -830,7 +830,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
 				   ns->head->disk->queue);
 #ifdef CONFIG_BLK_DEV_ZONED
 	if (blk_queue_is_zoned(ns->queue) && ns->head->disk)
-		ns->head->disk->queue->nr_zones = ns->queue->nr_zones;
+		ns->head->disk->nr_zones = ns->disk->nr_zones;
 #endif
 }
 
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 9d8717126ab3..c0ee21fcab81 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -57,7 +57,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
 	 * zones, reject the device. Otherwise, use report zones to detect if
 	 * the device has conventional zones.
 	 */
-	if (ns->bdev->bd_disk->queue->conv_zones_bitmap)
+	if (ns->bdev->bd_disk->conv_zones_bitmap)
 		return false;
 
 	ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev),
@@ -414,7 +414,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
 	}
 
 	while (sector < bdev_nr_sectors(bdev)) {
-		if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) {
+		if (test_bit(disk_zone_no(bdev->bd_disk, sector), d.zbitmap)) {
 			bio = blk_next_bio(bio, bdev, 0,
 				zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC,
 				GFP_KERNEL);
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index b4106f899734..b8c97456506a 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -855,7 +855,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
 
 	if (sdkp->zone_info.zone_blocks == zone_blocks &&
 	    sdkp->zone_info.nr_zones == nr_zones &&
-	    disk->queue->nr_zones == nr_zones)
+	    disk->nr_zones == nr_zones)
 		goto unlock;
 
 	flags = memalloc_noio_save();
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 810a24884f7e..d74f6a6b7e69 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1129,12 +1129,12 @@ void blk_dump_rq_flags(struct request *, char *);
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
-	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
+	return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
 }
 
 static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 {
-	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
+	return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
 }
 
 bool blk_req_needs_zone_write_lock(struct request *rq);
@@ -1156,8 +1156,8 @@ static inline void blk_req_zone_write_unlock(struct request *rq)
 
 static inline bool blk_req_zone_is_write_locked(struct request *rq)
 {
-	return rq->q->seq_zones_wlock &&
-		test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
+	return rq->q->disk->seq_zones_wlock &&
+		test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock);
 }
 
 static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 21b97f7115dc..22c477fadc0f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -164,6 +164,29 @@ struct gendisk {
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
+
+#ifdef CONFIG_BLK_DEV_ZONED
+	/*
+	 * Zoned block device information for request dispatch control.
+	 * nr_zones is the total number of zones of the device. This is always
+	 * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
+	 * bits which indicates if a zone is conventional (bit set) or
+	 * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
+	 * bits which indicates if a zone is write locked, that is, if a write
+	 * request targeting the zone was dispatched.
+	 *
+	 * Reads of this information must be protected with blk_queue_enter() /
+	 * blk_queue_exit(). Modifying this information is only allowed while
+	 * no requests are being processed. See also blk_mq_freeze_queue() and
+	 * blk_mq_unfreeze_queue().
+	 */
+	unsigned int		nr_zones;
+	unsigned int		max_open_zones;
+	unsigned int		max_active_zones;
+	unsigned long		*conv_zones_bitmap;
+	unsigned long		*seq_zones_wlock;
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 #if IS_ENABLED(CONFIG_CDROM)
 	struct cdrom_device_info *cdi;
 #endif
@@ -467,31 +490,6 @@ struct request_queue {
 
 	unsigned int		required_elevator_features;
 
-#ifdef CONFIG_BLK_DEV_ZONED
-	/*
-	 * Zoned block device information for request dispatch control.
-	 * nr_zones is the total number of zones of the device. This is always
-	 * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
-	 * bits which indicates if a zone is conventional (bit set) or
-	 * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
-	 * bits which indicates if a zone is write locked, that is, if a write
-	 * request targeting the zone was dispatched. All three fields are
-	 * initialized by the low level device driver (e.g. scsi/sd.c).
-	 * Stacking drivers (device mappers) may or may not initialize
-	 * these fields.
-	 *
-	 * Reads of this information must be protected with blk_queue_enter() /
-	 * blk_queue_exit(). Modifying this information is only allowed while
-	 * no requests are being processed. See also blk_mq_freeze_queue() and
-	 * blk_mq_unfreeze_queue().
-	 */
-	unsigned int		nr_zones;
-	unsigned long		*conv_zones_bitmap;
-	unsigned long		*seq_zones_wlock;
-	unsigned int		max_open_zones;
-	unsigned int		max_active_zones;
-#endif /* CONFIG_BLK_DEV_ZONED */
-
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace __rcu	*blk_trace;
@@ -668,63 +666,59 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+static inline unsigned int disk_nr_zones(struct gendisk *disk)
 {
-	return blk_queue_is_zoned(q) ? q->nr_zones : 0;
+	return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0;
 }
 
-static inline unsigned int blk_queue_zone_no(struct request_queue *q,
-					     sector_t sector)
+static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
 {
-	if (!blk_queue_is_zoned(q))
+	if (!blk_queue_is_zoned(disk->queue))
 		return 0;
-	return sector >> ilog2(q->limits.chunk_sectors);
+	return sector >> ilog2(disk->queue->limits.chunk_sectors);
 }
 
-static inline bool blk_queue_zone_is_seq(struct request_queue *q,
-					 sector_t sector)
+static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
 {
-	if (!blk_queue_is_zoned(q))
+	if (!blk_queue_is_zoned(disk->queue))
 		return false;
-	if (!q->conv_zones_bitmap)
+	if (!disk->conv_zones_bitmap)
 		return true;
-	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
+	return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
 }
 
 static inline void disk_set_max_open_zones(struct gendisk *disk,
 		unsigned int max_open_zones)
 {
-	disk->queue->max_open_zones = max_open_zones;
+	disk->max_open_zones = max_open_zones;
 }
 
 static inline void disk_set_max_active_zones(struct gendisk *disk,
 		unsigned int max_active_zones)
 {
-	disk->queue->max_active_zones = max_active_zones;
+	disk->max_active_zones = max_active_zones;
 }
 
 static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
 {
-	return bdev->bd_disk->queue->max_open_zones;
+	return bdev->bd_disk->max_open_zones;
 }
 
 static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
 {
-	return bdev->bd_disk->queue->max_active_zones;
+	return bdev->bd_disk->max_active_zones;
 }
 
 #else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+static inline unsigned int disk_nr_zones(struct gendisk *disk)
 {
 	return 0;
 }
-static inline bool blk_queue_zone_is_seq(struct request_queue *q,
-					 sector_t sector)
+static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
 {
 	return false;
 }
-static inline unsigned int blk_queue_zone_no(struct request_queue *q,
-					     sector_t sector)
+static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
 {
 	return 0;
 }
@@ -732,6 +726,7 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
 {
 	return 0;
 }
+
 static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
 {
 	return 0;
@@ -900,14 +895,12 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
 
 static inline unsigned int bio_zone_no(struct bio *bio)
 {
-	return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev),
-				 bio->bi_iter.bi_sector);
+	return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
 }
 
 static inline unsigned int bio_zone_is_seq(struct bio *bio)
 {
-	return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev),
-				     bio->bi_iter.bi_sector);
+	return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From f3ec5d11554778c24ac8915e847223ed71d104fc Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 11 Jul 2022 17:08:08 +0800
Subject: blk-mq: don't create hctx debugfs dir until q->debugfs_dir is created

blk_mq_debugfs_register_hctx() can be called by blk_mq_update_nr_hw_queues
when gendisk isn't added yet, such as nvme tcp.

Fixes the warning of 'debugfs: Directory 'hctx0' with parent '/' already present!'
which can be observed reliably when running blktests nvme/005.

Fixes: 6cfc0081b046 ("blk-mq: no need to check return value of debugfs_create functions")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220711090808.259682-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index b11add9a95e2..7ee1b13380d0 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -728,6 +728,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
 	char name[20];
 	int i;
 
+	if (!q->debugfs_dir)
+		return;
+
 	snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
 	hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
 
-- 
cgit v1.2.3-59-g8ed1b


From f4b1e27db49c8b985b116aa99481b4c6a4342ed4 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 12 Jul 2022 17:05:47 +0200
Subject: block/rq_qos: Use atomic_try_cmpxchg in atomic_inc_below

Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in
atomic_inc_below. x86 CMPXCHG instruction returns success in ZF flag,
so this change saves a compare after cmpxchg (and related move instruction
in front of cmpxchg).

Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when
cmpxchg fails, enabling further code simplifications.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220712150547.5786-1-ubizjak@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-rq-qos.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'block')

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d3a75693adbf..88f0fe7dcf54 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -10,16 +10,10 @@ static bool atomic_inc_below(atomic_t *v, unsigned int below)
 {
 	unsigned int cur = atomic_read(v);
 
-	for (;;) {
-		unsigned int old;
-
+	do {
 		if (cur >= below)
 			return false;
-		old = atomic_cmpxchg(v, cur, cur + 1);
-		if (old == cur)
-			break;
-		cur = old;
-	}
+	} while (!atomic_try_cmpxchg(v, &cur, cur + 1));
 
 	return true;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 939f9dd040fe1063d884f8f0f89b037093fe2341 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 12 Jul 2022 17:27:41 +0200
Subject: block: Use try_cmpxchg in update_io_ticks

Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in
update_io_ticks. x86 CMPXCHG instruction returns success in ZF flag,
so this change saves a compare after cmpxchg (and related
move instruction in front of cmpxchg).

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220712152741.7324-1-ubizjak@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index b530ce7b370c..8365996a8ef8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -943,7 +943,7 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
 again:
 	stamp = READ_ONCE(part->bd_stamp);
 	if (unlikely(time_after(now, stamp))) {
-		if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp))
+		if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
 			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
 	}
 	if (part->bd_partno) {
-- 
cgit v1.2.3-59-g8ed1b


From aee8960c2eae12636040dbf0f04e135273b1612d Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 12 Jul 2022 17:19:47 +0200
Subject: blk-iolatency: Use atomic{,64}_try_cmpxchg

Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old
in check_scale_change and atomic64_try_cmpxchg in blkcg_iolatency_done_bio.
x86 CMPXCHG instruction returns success in ZF flag, so this change saves a
compare after cmpxchg (and related move instruction in front of cmpxchg).

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220712151947.6783-1-ubizjak@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iolatency.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 9568bf8dfe82..79745c6d8e15 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -401,7 +401,6 @@ static void check_scale_change(struct iolatency_grp *iolat)
 	unsigned int cur_cookie;
 	unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
 	u64 scale_lat;
-	unsigned int old;
 	int direction = 0;
 
 	if (lat_to_blkg(iolat)->parent == NULL)
@@ -422,11 +421,10 @@ static void check_scale_change(struct iolatency_grp *iolat)
 	else
 		return;
 
-	old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
-
-	/* Somebody beat us to the punch, just bail. */
-	if (old != our_cookie)
+	if (!atomic_try_cmpxchg(&iolat->scale_cookie, &our_cookie, cur_cookie)) {
+		/* Somebody beat us to the punch, just bail. */
 		return;
+	}
 
 	if (direction < 0 && iolat->min_lat_nsec) {
 		u64 samples_thresh;
@@ -633,8 +631,8 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 			window_start = atomic64_read(&iolat->window_start);
 			if (now > window_start &&
 			    (now - window_start) >= iolat->cur_win_nsec) {
-				if (atomic64_cmpxchg(&iolat->window_start,
-					     window_start, now) == window_start)
+				if (atomic64_try_cmpxchg(&iolat->window_start,
+							 &window_start, now))
 					iolatency_check_latencies(iolat, now);
 			}
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 96388f57d2aad9836b2c589181fa1dbaba4066b4 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 12 Jul 2022 17:44:55 +0200
Subject: blk-cgroup: Use atomic{,64}_try_cmpxchg

Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old
in blkcg_unuse_delay, blkcg_set_delay and blkcg_clear_delay and
atomic64_try_cmpxchg in blkcg_scale_delay.  x86 CMPXCHG instruction
returns success in ZF flag, so this change saves a compare after cmpxchg
(and related move instruction in front of cmpxchg).

Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when
cmpxchg fails, enabling further code simplifications.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220712154455.66868-1-ubizjak@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c |  2 +-
 block/blk-cgroup.h | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 27a2d0ca0c70..869af9d72bcf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1700,7 +1700,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
 	 * everybody is happy with their IO latencies.
 	 */
 	if (time_before64(old + NSEC_PER_SEC, now) &&
-	    atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
+	    atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) {
 		u64 cur = atomic64_read(&blkg->delay_nsec);
 		u64 sub = min_t(u64, blkg->last_delay, now - old);
 		int cur_use = atomic_read(&blkg->use_delay);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d4de0a35e066..d2724d1dd7c9 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -430,12 +430,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 	 * then check to see if we were the last delay so we can drop the
 	 * congestion count on the cgroup.
 	 */
-	while (old) {
-		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
-		if (cur == old)
-			break;
-		old = cur;
-	}
+	while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old - 1))
+		;
 
 	if (old == 0)
 		return 0;
@@ -458,7 +454,7 @@ static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
 	int old = atomic_read(&blkg->use_delay);
 
 	/* We only want 1 person setting the congestion count for this blkg. */
-	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
+	if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1))
 		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 
 	atomic64_set(&blkg->delay_nsec, delay);
@@ -475,7 +471,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 	int old = atomic_read(&blkg->use_delay);
 
 	/* We only want 1 person clearing the congestion count for this blkg. */
-	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
+	if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0))
 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 5bf83e9a14ddae994d783dee96b91bf46f04839c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Jul 2022 07:53:09 +0200
Subject: block: stop using bdevname in bdev_write_inode

Just use the %pg format specifier instead.  Also reformat the
printk statement to be more readable.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220713055317.1888500-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/bdev.c b/block/bdev.c
index 5fe06c1f2def..ce05175e71ce 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -54,12 +54,10 @@ static void bdev_write_inode(struct block_device *bdev)
 	while (inode->i_state & I_DIRTY) {
 		spin_unlock(&inode->i_lock);
 		ret = write_inode_now(inode, true);
-		if (ret) {
-			char name[BDEVNAME_SIZE];
-			pr_warn_ratelimited("VFS: Dirty inode writeback failed "
-					    "for block device %s (err=%d).\n",
-					    bdevname(bdev, name), ret);
-		}
+		if (ret)
+			pr_warn_ratelimited(
+	"VFS: Dirty inode writeback failed for block device %pg (err=%d).\n",
+				bdev, ret);
 		spin_lock(&inode->i_lock);
 	}
 	spin_unlock(&inode->i_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 02ff3dd20f512cf811ae8028c44fdb212b5f2bf7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Jul 2022 07:53:10 +0200
Subject: block: stop using bdevname in __blkdev_issue_discard

Just use the %pg format specifier instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220713055317.1888500-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 09b7e1200c0f..67e6dbc1ae81 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -48,10 +48,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	/* In case the discard granularity isn't set by buggy device driver */
 	if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
-		char dev_name[BDEVNAME_SIZE];
-
-		bdevname(bdev, dev_name);
-		pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name);
+		pr_err_ratelimited("%pg: Error: discard_granularity is 0.\n",
+				   bdev);
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 900d156bac2bc474cf7c7bee4efbc6c83ec5ae58 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Jul 2022 07:53:17 +0200
Subject: block: remove bdevname

Replace the remaining calls of bdevname with snprintf using the %pg
format specifier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220713055317.1888500-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c           | 23 -----------------------
 drivers/md/md.c         |  2 +-
 drivers/md/raid1.c      |  2 +-
 drivers/md/raid10.c     |  2 +-
 fs/ext4/mmp.c           |  3 ++-
 fs/jbd2/journal.c       |  6 ++++--
 include/linux/blkdev.h  |  1 -
 kernel/trace/blktrace.c |  4 ++--
 8 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index 9d30f159c59a..44dfcf67ed96 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -101,29 +101,6 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
 }
 EXPORT_SYMBOL_GPL(set_capacity_and_notify);
 
-/*
- * Format the device name of the indicated block device into the supplied buffer
- * and return a pointer to that same buffer for convenience.
- *
- * Note: do not use this in new code, use the %pg specifier to sprintf and
- * printk insted.
- */
-const char *bdevname(struct block_device *bdev, char *buf)
-{
-	struct gendisk *hd = bdev->bd_disk;
-	int partno = bdev->bd_partno;
-
-	if (!partno)
-		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
-	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
-		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
-	else
-		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
-
-	return buf;
-}
-EXPORT_SYMBOL(bdevname);
-
 static void part_stat_read_all(struct block_device *part,
 		struct disk_stats *stat)
 {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 076255ec9ba1..4be9d8173071 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2438,7 +2438,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 			mdname(mddev), mddev->max_disks);
 		return -EBUSY;
 	}
-	bdevname(rdev->bdev,b);
+	snprintf(b, sizeof(b), "%pg", rdev->bdev);
 	strreplace(b, '/', '!');
 
 	rdev->mddev = mddev;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 258d4eb2d63c..65cd90f0b2a8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1240,7 +1240,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 		rcu_read_lock();
 		rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
 		if (rdev)
-			bdevname(rdev->bdev, b);
+			snprintf(b, sizeof(b), "%pg", rdev->bdev);
 		else
 			strcpy(b, "???");
 		rcu_read_unlock();
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d589f823feb1..a7dcb1bf6b0a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1164,7 +1164,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 		disk = r10_bio->devs[slot].devnum;
 		err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
 		if (err_rdev)
-			bdevname(err_rdev->bdev, b);
+			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
 		else {
 			strcpy(b, "???");
 			/* This never gets dereferenced */
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index b7a850b0070b..b221f313ded6 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -371,7 +371,8 @@ skip:
 	EXT4_SB(sb)->s_mmp_bh = bh;
 
 	BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
-	bdevname(bh->b_bdev, mmp->mmp_bdevname);
+	snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname),
+		 "%pg", bh->b_bdev);
 
 	/*
 	 * Start a kernel thread to update the MMP block periodically.
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c0cbeeaec2d1..9015f5fa2862 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1465,7 +1465,8 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
 	if (!journal)
 		return NULL;
 
-	bdevname(journal->j_dev, journal->j_devname);
+	snprintf(journal->j_devname, sizeof(journal->j_devname),
+		 "%pg", journal->j_dev);
 	strreplace(journal->j_devname, '/', '!');
 	jbd2_stats_proc_init(journal);
 
@@ -1507,7 +1508,8 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
 		return NULL;
 
 	journal->j_inode = inode;
-	bdevname(journal->j_dev, journal->j_devname);
+	snprintf(journal->j_devname, sizeof(journal->j_devname),
+		 "%pg", journal->j_dev);
 	p = strreplace(journal->j_devname, '/', '!');
 	sprintf(p, "-%lu", journal->j_inode->i_ino);
 	jbd2_stats_proc_init(journal);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22c477fadc0f..2775763c51b9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1457,7 +1457,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
 int bdev_read_only(struct block_device *bdev);
 int set_blocksize(struct block_device *bdev, int size);
 
-const char *bdevname(struct block_device *bdev, char *buffer);
 int lookup_bdev(const char *pathname, dev_t *dev);
 
 void blkdev_show(struct seq_file *seqf, off_t offset);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c584effe5fe9..4752bda1b1a0 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -736,12 +736,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 
 	switch (cmd) {
 	case BLKTRACESETUP:
-		bdevname(bdev, b);
+		snprintf(b, sizeof(b), "%pg", bdev);
 		ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
 	case BLKTRACESETUP32:
-		bdevname(bdev, b);
+		snprintf(b, sizeof(b), "%pg", bdev);
 		ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From ff07a02e9e8e6489db841e0c48a5c78e7e78d572 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:27 -0700
Subject: treewide: Rename enum req_opf into enum req_op

The type name enum req_opf is misleading since it suggests that values of
this type include both an operation type and flags. Since values of this
type represent an operation only, change the type name into enum req_op.

Convert the enum req_op documentation into kernel-doc format. Move a few
definitions such that the enum req_op documentation occurs just above
the enum req_op definition.

The name "req_opf" was introduced by commit ef295ecf090d ("block: better op
and flags encoding").

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-2-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c                 |  7 +++----
 drivers/block/null_blk/main.c     |  9 ++++-----
 drivers/block/null_blk/null_blk.h | 12 +++++-------
 drivers/block/null_blk/trace.h    |  2 +-
 drivers/block/null_blk/zoned.c    |  4 ++--
 drivers/md/dm-integrity.c         |  2 +-
 drivers/nvme/target/zns.c         |  4 ++--
 drivers/scsi/sd_zbc.c             |  2 +-
 drivers/ufs/core/ufshpb.c         |  5 ++---
 fs/zonefs/super.c                 |  5 ++---
 fs/zonefs/trace.h                 |  2 +-
 include/linux/blk_types.h         | 16 ++++++++--------
 include/linux/blkdev.h            |  2 +-
 13 files changed, 33 insertions(+), 39 deletions(-)

(limited to 'block')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 7c017458d5ce..a264621d4905 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -256,9 +256,8 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
  *    The operation to execute on each zone can be a zone reset, open, close
  *    or finish request.
  */
-int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
-		     sector_t sector, sector_t nr_sectors,
-		     gfp_t gfp_mask)
+int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
+		     sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	sector_t zone_sectors = bdev_zone_sectors(bdev);
@@ -397,7 +396,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	void __user *argp = (void __user *)arg;
 	struct request_queue *q;
 	struct blk_zone_range zrange;
-	enum req_opf op;
+	enum req_op op;
 	int ret;
 
 	if (!argp)
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 4e03a020ee3c..8b224ede2e33 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1310,7 +1310,7 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
 }
 
 static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
-						     enum req_opf op,
+						     enum req_op op,
 						     sector_t sector,
 						     sector_t nr_sectors)
 {
@@ -1381,9 +1381,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
 	}
 }
 
-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
-			      enum req_opf op, sector_t sector,
-			      unsigned int nr_sectors)
+blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
+			      sector_t sector, unsigned int nr_sectors)
 {
 	struct nullb_device *dev = cmd->nq->dev;
 	blk_status_t ret;
@@ -1401,7 +1400,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd,
 }
 
 static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
-				    sector_t nr_sectors, enum req_opf op)
+				    sector_t nr_sectors, enum req_op op)
 {
 	struct nullb_device *dev = cmd->nq->dev;
 	struct nullb *nullb = dev->nullb;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 8359b43842f2..6fbf0a1b2622 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -136,9 +136,8 @@ struct nullb {
 
 blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
 				 sector_t nr_sectors);
-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
-			      enum req_opf op, sector_t sector,
-			      unsigned int nr_sectors);
+blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
+			      sector_t sector, unsigned int nr_sectors);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
@@ -146,9 +145,8 @@ int null_register_zoned_dev(struct nullb *nullb);
 void null_free_zoned_dev(struct nullb_device *dev);
 int null_report_zones(struct gendisk *disk, sector_t sector,
 		      unsigned int nr_zones, report_zones_cb cb, void *data);
-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
-				    enum req_opf op, sector_t sector,
-				    sector_t nr_sectors);
+blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
+				    sector_t sector, sector_t nr_sectors);
 size_t null_zone_valid_read_len(struct nullb *nullb,
 				sector_t sector, unsigned int len);
 #else
@@ -164,7 +162,7 @@ static inline int null_register_zoned_dev(struct nullb *nullb)
 }
 static inline void null_free_zoned_dev(struct nullb_device *dev) {}
 static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
-			enum req_opf op, sector_t sector, sector_t nr_sectors)
+			enum req_op op, sector_t sector, sector_t nr_sectors)
 {
 	return BLK_STS_NOTSUPP;
 }
diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
index 86d6c12c603c..6b2b370e786f 100644
--- a/drivers/block/null_blk/trace.h
+++ b/drivers/block/null_blk/trace.h
@@ -36,7 +36,7 @@ TRACE_EVENT(nullb_zone_op,
 	    TP_ARGS(cmd, zone_no, zone_cond),
 	    TP_STRUCT__entry(
 		__array(char, disk, DISK_NAME_LEN)
-		__field(enum req_opf, op)
+		__field(enum req_op, op)
 		__field(unsigned int, zone_no)
 		__field(unsigned int, zone_cond)
 	    ),
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 64b06caab984..55a69e48ef8b 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -600,7 +600,7 @@ static blk_status_t null_reset_zone(struct nullb_device *dev,
 	return BLK_STS_OK;
 }
 
-static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
 				   sector_t sector)
 {
 	struct nullb_device *dev = cmd->nq->dev;
@@ -653,7 +653,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
 	return ret;
 }
 
-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
+blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
 				    sector_t sector, sector_t nr_sectors)
 {
 	struct nullb_device *dev;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 3d5a0ce123c9..148978ad03a8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -298,7 +298,7 @@ struct dm_integrity_io {
 	struct work_struct work;
 
 	struct dm_integrity_c *ic;
-	enum req_opf op;
+	enum req_op op;
 	bool fua;
 
 	struct dm_integrity_range range;
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index c0ee21fcab81..b233c0943fec 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -308,7 +308,7 @@ void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
 	queue_work(zbd_wq, &req->z.zmgmt_work);
 }
 
-static inline enum req_opf zsa_req_op(u8 zsa)
+static inline enum req_op zsa_req_op(u8 zsa)
 {
 	switch (zsa) {
 	case NVME_ZONE_OPEN:
@@ -465,7 +465,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
 {
 	struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work);
 	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
-	enum req_opf op = zsa_req_op(req->cmd->zms.zsa);
+	enum req_op op = zsa_req_op(req->cmd->zms.zsa);
 	struct block_device *bdev = req->ns->bdev;
 	sector_t zone_sectors = bdev_zone_sectors(bdev);
 	u16 status = NVME_SC_SUCCESS;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index b8c97456506a..bd15624c6322 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -529,7 +529,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
 	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
 	unsigned int zno = blk_rq_zone_no(rq);
-	enum req_opf op = req_op(rq);
+	enum req_op op = req_op(rq);
 	unsigned long flags;
 
 	/*
diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c
index de2bb8401bc4..24f1ee82c215 100644
--- a/drivers/ufs/core/ufshpb.c
+++ b/drivers/ufs/core/ufshpb.c
@@ -433,9 +433,8 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 	return 0;
 }
 
-static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb,
-					 int rgn_idx, enum req_opf dir,
-					 bool atomic)
+static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb, int rgn_idx,
+					 enum req_op dir, bool atomic)
 {
 	struct ufshpb_req *rq;
 	struct request *req;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 9c0eef1ff32a..a221ddb12da6 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -60,8 +60,7 @@ static void zonefs_account_active(struct inode *inode)
 	}
 }
 
-static inline int zonefs_zone_mgmt(struct inode *inode,
-				   enum req_opf op)
+static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
 {
 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
 	int ret;
@@ -525,7 +524,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
 {
 	struct zonefs_inode_info *zi = ZONEFS_I(inode);
 	loff_t old_isize;
-	enum req_opf op;
+	enum req_op op;
 	int ret = 0;
 
 	/*
diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h
index f369d7d50303..21501da764bd 100644
--- a/fs/zonefs/trace.h
+++ b/fs/zonefs/trace.h
@@ -20,7 +20,7 @@
 #define show_dev(dev) MAJOR(dev), MINOR(dev)
 
 TRACE_EVENT(zonefs_zone_mgmt,
-	    TP_PROTO(struct inode *inode, enum req_opf op),
+	    TP_PROTO(struct inode *inode, enum req_op op),
 	    TP_ARGS(inode, op),
 	    TP_STRUCT__entry(
 			     __field(dev_t, dev)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a24d4078fb21..0e6a2af7ed3d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -337,8 +337,12 @@ enum {
 
 typedef __u32 __bitwise blk_mq_req_flags_t;
 
-/*
- * Operations and flags common to the bio and request structures.
+#define REQ_OP_BITS	8
+#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS	24
+
+/**
+ * enum req_op - Operations common to the bio and request structures.
  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
  *
  * The least significant bit of the operation number indicates the data
@@ -350,11 +354,7 @@ typedef __u32 __bitwise blk_mq_req_flags_t;
  * If a operation does not transfer data the least significant bit has no
  * meaning.
  */
-#define REQ_OP_BITS	8
-#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
-#define REQ_FLAG_BITS	24
-
-enum req_opf {
+enum req_op {
 	/* read sectors from the device */
 	REQ_OP_READ		= 0,
 	/* write sectors to the device */
@@ -509,7 +509,7 @@ static inline bool op_is_discard(unsigned int op)
  * due to its different handling in the block layer and device response in
  * case of command failure.
  */
-static inline bool op_is_zone_mgmt(enum req_opf op)
+static inline bool op_is_zone_mgmt(enum req_op op)
 {
 	switch (op & REQ_OP_MASK) {
 	case REQ_OP_ZONE_RESET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2775763c51b9..ec072a5129bf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -322,7 +322,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
 unsigned int bdev_nr_zones(struct block_device *bdev);
-extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
 			    sector_t sectors, sector_t nr_sectors,
 			    gfp_t gfp_mask);
 int blk_revalidate_disk_zones(struct gendisk *disk,
-- 
cgit v1.2.3-59-g8ed1b


From 77e7ffd7ad3952909be6a9c599b7d164c8866fec Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:28 -0700
Subject: block: Use enum req_op where appropriate

Change the type of the arguments that are used to pass a REQ_OP_* value
from int or unsigned int into enum req_op to improve static type
checking.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-3-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          | 6 +++---
 block/blk-mq-debugfs.c    | 2 +-
 block/blk-throttle.c      | 7 ++++---
 block/blk-wbt.c           | 2 +-
 block/blk.h               | 2 +-
 include/linux/blk_types.h | 2 +-
 include/linux/blkdev.h    | 6 +++---
 7 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8365996a8ef8..67b8bcfa27f0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -136,7 +136,7 @@ static const char *const blk_op_name[] = {
  * string format. Useful in the debugging and tracing bio or request. For
  * invalid REQ_OP_XXX it returns string "UNKNOWN".
  */
-inline const char *blk_op_str(unsigned int op)
+inline const char *blk_op_str(enum req_op op)
 {
 	const char *op_str = "UNKNOWN";
 
@@ -953,7 +953,7 @@ again:
 }
 
 unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, unsigned int op,
+				 unsigned int sectors, enum req_op op,
 				 unsigned long start_time)
 {
 	const int sgrp = op_stat_group(op);
@@ -994,7 +994,7 @@ unsigned long bio_start_io_acct(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
-void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 		      unsigned long start_time)
 {
 	const int sgrp = op_stat_group(op);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7ee1b13380d0..6cc2411e2d26 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -304,7 +304,7 @@ static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
-	const unsigned int op = req_op(rq);
+	const enum req_op op = req_op(rq);
 	const char *op_str = blk_op_str(op);
 
 	seq_printf(m, "%p {.op=", rq);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 139b2d7a99e2..9f5fe62afff9 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2203,8 +2203,9 @@ out_unlock:
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 static void throtl_track_latency(struct throtl_data *td, sector_t size,
-	int op, unsigned long time)
+				 enum req_op op, unsigned long time)
 {
+	const bool rw = op_is_write(op);
 	struct latency_bucket *latency;
 	int index;
 
@@ -2215,10 +2216,10 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size,
 
 	index = request_bucket_index(size);
 
-	latency = get_cpu_ptr(td->latency_buckets[op]);
+	latency = get_cpu_ptr(td->latency_buckets[rw]);
 	latency[index].total_latency += time;
 	latency[index].samples++;
-	put_cpu_ptr(td->latency_buckets[op]);
+	put_cpu_ptr(td->latency_buckets[rw]);
 }
 
 void blk_throtl_stat_add(struct request *rq, u64 time_ns)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0c119be0e813..7bf09ae06577 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -670,7 +670,7 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
 
 static int wbt_data_dir(const struct request *rq)
 {
-	const int op = req_op(rq);
+	const enum req_op op = req_op(rq);
 
 	if (op == REQ_OP_READ)
 		return READ;
diff --git a/block/blk.h b/block/blk.h
index b71e22c97d77..c4b084bfe87c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -160,7 +160,7 @@ static inline bool blk_discard_mergable(struct request *req)
 }
 
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
-						     int op)
+						     enum req_op op)
 {
 	if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
 		return min(q->limits.max_discard_sectors,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0e6a2af7ed3d..cce8768bc00b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -522,7 +522,7 @@ static inline bool op_is_zone_mgmt(enum req_op op)
 	}
 }
 
-static inline int op_stat_group(unsigned int op)
+static inline int op_stat_group(enum req_op op)
 {
 	if (op_is_discard(op))
 		return STAT_DISCARD;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ec072a5129bf..2f13f0062192 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -872,7 +872,7 @@ extern void blk_queue_exit(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 
 /* Helper to convert REQ_OP_XXX to its string format XXX */
-extern const char *blk_op_str(unsigned int op);
+extern const char *blk_op_str(enum req_op op);
 
 int blk_status_to_errno(blk_status_t status);
 blk_status_t errno_to_blk_status(int errno);
@@ -1434,9 +1434,9 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
 }
 
 unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, unsigned int op,
+				 unsigned int sectors, enum req_op op,
 				 unsigned long start_time);
-void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 		unsigned long start_time);
 
 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
-- 
cgit v1.2.3-59-g8ed1b


From 2d9b02be73ba8efba406b399a722b4e33614dd0e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:30 -0700
Subject: block: Change the type of req_op() and bio_op() into enum req_op

Improve static type checking by changing the type of the value returned by
req_op() and bio_op() from unsigned int into enum req_op. Insert
'default: break;' in switch statements on the enum req_op type to prevent
that the compiler warns about these switch statements.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-5-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c         | 2 ++
 drivers/block/paride/pd.c | 2 ++
 drivers/md/dm.c           | 2 ++
 include/linux/blk-mq.h    | 6 ++++--
 include/linux/blk_types.h | 6 ++++--
 5 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5abf5aa5a5f0..de178a8b4c82 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -405,6 +405,8 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
 		return 1;
 	case REQ_OP_WRITE_ZEROES:
 		return 0;
+	default:
+		break;
 	}
 
 	rq_for_each_bvec(bv, rq, iter)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index c8c14c6f5c3a..f8a75bc90f70 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -501,6 +501,8 @@ static enum action do_pd_io_start(void)
 			return do_pd_read_start();
 		else
 			return do_pd_write_start();
+	default:
+		break;
 	}
 	return Fail;
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 33d3799bb66e..6c21922b87d0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1542,6 +1542,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
 	case REQ_OP_WRITE_ZEROES:
 		num_bios = ti->num_write_zeroes_bios;
 		break;
+	default:
+		break;
 	}
 
 	/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d74f6a6b7e69..677195de0663 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -198,8 +198,10 @@ struct request {
 	void *end_io_data;
 };
 
-#define req_op(req) \
-	((req)->cmd_flags & REQ_OP_MASK)
+static inline enum req_op req_op(const struct request *req)
+{
+	return req->cmd_flags & REQ_OP_MASK;
+}
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
 {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cce8768bc00b..e66cbe377ae8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -463,8 +463,10 @@ enum stat_group {
 	NR_STAT_GROUPS
 };
 
-#define bio_op(bio) \
-	((bio)->bi_opf & REQ_OP_MASK)
+static inline enum req_op bio_op(const struct bio *bio)
+{
+	return bio->bi_opf & REQ_OP_MASK;
+}
 
 /* obsolete, don't use in new code */
 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
-- 
cgit v1.2.3-59-g8ed1b


From 16458cf3bd15e5624205df6e8a76b9a5363555f3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:32 -0700
Subject: block: Use the new blk_opf_t type

Use the new blk_opf_t type for arguments and variables that represent
request flags or a bitwise combination of a request operation and
request flags. Rename the function arguments and also a structure member
that hold a request operation and flags from 'rw' into 'opf'.

This patch does not change any functionality.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-7-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 10 +++++-----
 block/blk-cgroup-rwstat.h |  8 ++++----
 block/blk-core.c          |  2 +-
 block/blk-flush.c         |  6 +++---
 block/blk-merge.c         |  6 +++---
 block/blk-mq-debugfs.c    |  4 ++--
 block/blk-mq.c            | 15 ++++++++-------
 block/blk-mq.h            |  6 +++---
 block/blk-wbt.c           | 16 ++++++++--------
 block/elevator.h          |  2 +-
 block/fops.c              | 12 ++++++------
 include/linux/bio.h       | 10 +++++-----
 include/linux/blk-mq.h    |  6 +++---
 include/linux/blkdev.h    |  2 +-
 14 files changed, 53 insertions(+), 52 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 888ee81ea303..6f9f883f9a65 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -239,7 +239,7 @@ static void bio_free(struct bio *bio)
  * when IO has completed, or when the bio is released.
  */
 void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
-	      unsigned short max_vecs, unsigned int opf)
+	      unsigned short max_vecs, blk_opf_t opf)
 {
 	bio->bi_next = NULL;
 	bio->bi_bdev = bdev;
@@ -292,7 +292,7 @@ EXPORT_SYMBOL(bio_init);
  *   preserved are the ones that are initialized by bio_alloc_bioset(). See
  *   comment in struct bio.
  */
-void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
+void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf)
 {
 	bio_uninit(bio);
 	memset(bio, 0, BIO_RESET_BYTES);
@@ -341,7 +341,7 @@ void bio_chain(struct bio *bio, struct bio *parent)
 EXPORT_SYMBOL(bio_chain);
 
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
-		unsigned int nr_pages, unsigned int opf, gfp_t gfp)
+		unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
 {
 	struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
 
@@ -409,7 +409,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 }
 
 static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
-		unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
+		unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
 		struct bio_set *bs)
 {
 	struct bio_alloc_cache *cache;
@@ -468,7 +468,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
  * Returns: Pointer to new bio on success, NULL on failure.
  */
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
-			     unsigned int opf, gfp_t gfp_mask,
+			     blk_opf_t opf, gfp_t gfp_mask,
 			     struct bio_set *bs)
 {
 	gfp_t saved_gfp = gfp_mask;
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
index 9f2723b34b75..022527b0b043 100644
--- a/block/blk-cgroup-rwstat.h
+++ b/block/blk-cgroup-rwstat.h
@@ -59,20 +59,20 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
  * caller is responsible for synchronizing calls to this function.
  */
 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-				   unsigned int op, uint64_t val)
+				   blk_opf_t opf, uint64_t val)
 {
 	struct percpu_counter *cnt;
 
-	if (op_is_discard(op))
+	if (op_is_discard(opf))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
-	else if (op_is_write(op))
+	else if (op_is_write(opf))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
 
 	percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op_is_sync(op))
+	if (op_is_sync(opf))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
diff --git a/block/blk-core.c b/block/blk-core.c
index 67b8bcfa27f0..123468b9d2e4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1203,7 +1203,7 @@ EXPORT_SYMBOL_GPL(blk_io_schedule);
 
 int __init blk_dev_init(void)
 {
-	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
+	BUILD_BUG_ON((__force u32)REQ_OP_LAST >= (1 << REQ_OP_BITS));
 	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
 			sizeof_field(struct request, cmd_flags));
 	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c68968724870..d20a0c6b2c66 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -94,7 +94,7 @@ enum {
 };
 
 static void blk_kick_flush(struct request_queue *q,
-			   struct blk_flush_queue *fq, unsigned int flags);
+			   struct blk_flush_queue *fq, blk_opf_t flags);
 
 static inline struct blk_flush_queue *
 blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
@@ -173,7 +173,7 @@ static void blk_flush_complete_seq(struct request *rq,
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
-	unsigned int cmd_flags;
+	blk_opf_t cmd_flags;
 
 	BUG_ON(rq->flush.seq & seq);
 	rq->flush.seq |= seq;
@@ -290,7 +290,7 @@ bool is_flush_rq(struct request *rq)
  *
  */
 static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
-			   unsigned int flags)
+			   blk_opf_t flags)
 {
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	struct request *first_rq =
diff --git a/block/blk-merge.c b/block/blk-merge.c
index de178a8b4c82..3c3f785f558a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -712,7 +712,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
  */
 void blk_rq_set_mixed_merge(struct request *rq)
 {
-	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
 	struct bio *bio;
 
 	if (rq->rq_flags & RQF_MIXED_MERGE)
@@ -928,7 +928,7 @@ enum bio_merge_status {
 static enum bio_merge_status bio_attempt_back_merge(struct request *req,
 		struct bio *bio, unsigned int nr_segs)
 {
-	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
+	const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
 
 	if (!ll_back_merge_fn(req, bio, nr_segs))
 		return BIO_MERGE_FAILED;
@@ -952,7 +952,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req,
 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
 		struct bio *bio, unsigned int nr_segs)
 {
-	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
+	const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
 
 	if (!ll_front_merge_fn(req, bio, nr_segs))
 		return BIO_MERGE_FAILED;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 6cc2411e2d26..8559cea7f300 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -313,8 +313,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	else
 		seq_printf(m, "%s", op_str);
 	seq_puts(m, ", .cmd_flags=");
-	blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name,
-		       ARRAY_SIZE(cmd_flag_name));
+	blk_flags_show(m, (__force unsigned int)(rq->cmd_flags & ~REQ_OP_MASK),
+		       cmd_flag_name, ARRAY_SIZE(cmd_flag_name));
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f1b84e20b1a9..d716b7f3763f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -510,13 +510,13 @@ retry:
 					alloc_time_ns);
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
+struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
 		blk_mq_req_flags_t flags)
 {
 	struct blk_mq_alloc_data data = {
 		.q		= q,
 		.flags		= flags,
-		.cmd_flags	= op,
+		.cmd_flags	= opf,
 		.nr_tags	= 1,
 	};
 	struct request *rq;
@@ -540,12 +540,12 @@ out_queue_exit:
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
-	unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
+	blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx)
 {
 	struct blk_mq_alloc_data data = {
 		.q		= q,
 		.flags		= flags,
-		.cmd_flags	= op,
+		.cmd_flags	= opf,
 		.nr_tags	= 1,
 	};
 	u64 alloc_time_ns = 0;
@@ -660,7 +660,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
 		rq->q->disk ? rq->q->disk->disk_name : "?",
-		(unsigned long long) rq->cmd_flags);
+		(__force unsigned long long) rq->cmd_flags);
 
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
@@ -713,8 +713,9 @@ static void blk_print_req_error(struct request *req, blk_status_t status)
 		"phys_seg %u prio class %u\n",
 		blk_status_to_str(status),
 		req->q->disk ? req->q->disk->disk_name : "?",
-		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
-		req->cmd_flags & ~REQ_OP_MASK,
+		blk_rq_pos(req), (__force u32)req_op(req),
+		blk_op_str(req_op(req)),
+		(__force u32)(req->cmd_flags & ~REQ_OP_MASK),
 		req->nr_phys_segments,
 		IOPRIO_PRIO_CLASS(req->ioprio));
 }
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e694ec67d646..8ca453ac243d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -86,7 +86,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
 	return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
 }
 
-static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf)
+static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf)
 {
 	enum hctx_type type = HCTX_TYPE_DEFAULT;
 
@@ -107,7 +107,7 @@ static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf)
  * @ctx: software queue cpu ctx
  */
 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
-						     unsigned int opf,
+						     blk_opf_t opf,
 						     struct blk_mq_ctx *ctx)
 {
 	return ctx->hctxs[blk_mq_get_hctx_type(opf)];
@@ -152,7 +152,7 @@ struct blk_mq_alloc_data {
 	struct request_queue *q;
 	blk_mq_req_flags_t flags;
 	unsigned int shallow_depth;
-	unsigned int cmd_flags;
+	blk_opf_t cmd_flags;
 	req_flags_t rq_flags;
 
 	/* allocate multiple requests/tags in one go */
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 7bf09ae06577..f2e4bf1dca47 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -451,7 +451,7 @@ static bool close_io(struct rq_wb *rwb)
 
 #define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
 
-static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
+static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
 {
 	unsigned int limit;
 
@@ -462,7 +462,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	if (!rwb_enabled(rwb))
 		return UINT_MAX;
 
-	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
+	if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
 		return rwb->wb_background;
 
 	/*
@@ -473,9 +473,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	 * the idle limit, or go to normal if we haven't had competing
 	 * IO for a bit.
 	 */
-	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
 		limit = rwb->rq_depth.max_depth;
-	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
+	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
 		/*
 		 * If less than 100ms since we completed unrelated IO,
 		 * limit us to half the depth for background writeback.
@@ -490,13 +490,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 struct wbt_wait_data {
 	struct rq_wb *rwb;
 	enum wbt_flags wb_acct;
-	unsigned long rw;
+	blk_opf_t opf;
 };
 
 static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
 {
 	struct wbt_wait_data *data = private_data;
-	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
+	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf));
 }
 
 static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
@@ -510,13 +510,13 @@ static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
  * the timer to kick off queuing again.
  */
 static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
-		       unsigned long rw)
+		       blk_opf_t opf)
 {
 	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
 	struct wbt_wait_data data = {
 		.rwb = rwb,
 		.wb_acct = wb_acct,
-		.rw = rw,
+		.opf = opf,
 	};
 
 	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
diff --git a/block/elevator.h b/block/elevator.h
index 16cd8bdedb7e..3f0593b3bf9d 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -34,7 +34,7 @@ struct elevator_mq_ops {
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *);
 	void (*prepare_request)(struct request *);
 	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
diff --git a/block/fops.c b/block/fops.c
index 86d3cab9bf93..29066ac5a2fa 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -32,14 +32,14 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
-static unsigned int dio_bio_write_op(struct kiocb *iocb)
+static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
 {
-	unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+	blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
 
 	/* avoid the need for a I/O completion work item */
 	if (iocb->ki_flags & IOCB_DSYNC)
-		op |= REQ_FUA;
-	return op;
+		opf |= REQ_FUA;
+	return opf;
 }
 
 static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
@@ -175,7 +175,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
-	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
@@ -297,7 +297,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 {
 	struct block_device *bdev = iocb->ki_filp->private_data;
 	bool is_read = iov_iter_rw(iter) == READ;
-	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	loff_t pos = iocb->ki_pos;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 992ee987f273..ca22b06700a9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -405,7 +405,7 @@ extern void bioset_exit(struct bio_set *);
 extern int biovec_init_pool(mempool_t *pool, int pool_entries);
 
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
-			     unsigned int opf, gfp_t gfp_mask,
+			     blk_opf_t opf, gfp_t gfp_mask,
 			     struct bio_set *bs);
 struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
 extern void bio_put(struct bio *);
@@ -418,7 +418,7 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio,
 extern struct bio_set fs_bio_set;
 
 static inline struct bio *bio_alloc(struct block_device *bdev,
-		unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
+		unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask)
 {
 	return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set);
 }
@@ -456,9 +456,9 @@ struct request_queue;
 
 extern int submit_bio_wait(struct bio *bio);
 void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
-	      unsigned short max_vecs, unsigned int opf);
+	      unsigned short max_vecs, blk_opf_t opf);
 extern void bio_uninit(struct bio *);
-void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf);
+void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
 void bio_chain(struct bio *, struct bio *);
 
 int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
@@ -789,6 +789,6 @@ static inline void bio_clear_polled(struct bio *bio)
 }
 
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
-		unsigned int nr_pages, unsigned int opf, gfp_t gfp);
+		unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
 
 #endif /* __LINUX_BIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 677195de0663..effee1dc715a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -80,7 +80,7 @@ struct request {
 	struct blk_mq_ctx *mq_ctx;
 	struct blk_mq_hw_ctx *mq_hctx;
 
-	unsigned int cmd_flags;		/* op and common flags */
+	blk_opf_t cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
 
 	int tag;
@@ -715,10 +715,10 @@ enum {
 	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
+struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
 		blk_mq_req_flags_t flags);
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
-		unsigned int op, blk_mq_req_flags_t flags,
+		blk_opf_t opf, blk_mq_req_flags_t flags,
 		unsigned int hctx_idx);
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca2ff113ea00..d04bdf549efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -250,7 +250,7 @@ static inline int blk_validate_block_size(unsigned long bsize)
 	return 0;
 }
 
-static inline bool blk_op_is_passthrough(unsigned int op)
+static inline bool blk_op_is_passthrough(blk_opf_t op)
 {
 	op &= REQ_OP_MASK;
 	return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
-- 
cgit v1.2.3-59-g8ed1b


From dc469ba2e790cb0a335e2650b701639752ff65cd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:33 -0700
Subject: block/bfq: Use the new blk_opf_t type

Use the new blk_opf_t type for arguments and variables that represent
request flags or a bitwise combination of a request operation and
request flags. Rename those variables from 'op' into 'opf'.

This patch does not change any functionality.

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-8-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c  | 26 +++++++++++++-------------
 block/bfq-iosched.c | 16 ++++++++--------
 block/bfq-iosched.h |  8 ++++----
 3 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'block')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 9fc605791b1e..30b15a9a47c4 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -220,46 +220,46 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
 }
 
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
-			      unsigned int op)
+			      blk_opf_t opf)
 {
-	blkg_rwstat_add(&bfqg->stats.queued, op, 1);
+	blkg_rwstat_add(&bfqg->stats.queued, opf, 1);
 	bfqg_stats_end_empty_time(&bfqg->stats);
 	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
 		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
 }
 
-void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf)
 {
-	blkg_rwstat_add(&bfqg->stats.queued, op, -1);
+	blkg_rwstat_add(&bfqg->stats.queued, opf, -1);
 }
 
-void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf)
 {
-	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
+	blkg_rwstat_add(&bfqg->stats.merged, opf, 1);
 }
 
 void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
-				  u64 io_start_time_ns, unsigned int op)
+				  u64 io_start_time_ns, blk_opf_t opf)
 {
 	struct bfqg_stats *stats = &bfqg->stats;
 	u64 now = ktime_get_ns();
 
 	if (now > io_start_time_ns)
-		blkg_rwstat_add(&stats->service_time, op,
+		blkg_rwstat_add(&stats->service_time, opf,
 				now - io_start_time_ns);
 	if (io_start_time_ns > start_time_ns)
-		blkg_rwstat_add(&stats->wait_time, op,
+		blkg_rwstat_add(&stats->wait_time, opf,
 				io_start_time_ns - start_time_ns);
 }
 
 #else /* CONFIG_BFQ_CGROUP_DEBUG */
 
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
-			      unsigned int op) { }
-void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
+			      blk_opf_t opf) { }
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { }
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { }
 void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
-				  u64 io_start_time_ns, unsigned int op) { }
+				  u64 io_start_time_ns, blk_opf_t opf) { }
 void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
 void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e6d7e6b01a05..c740b41fe0a4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -668,19 +668,19 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
  * significantly affect service guarantees coming from the BFQ scheduling
  * algorithm.
  */
-static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
 {
 	struct bfq_data *bfqd = data->q->elevator->elevator_data;
 	struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
-	struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL;
+	struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL;
 	int depth;
 	unsigned limit = data->q->nr_requests;
 
 	/* Sync reads have full depth available */
-	if (op_is_sync(op) && !op_is_write(op)) {
+	if (op_is_sync(opf) && !op_is_write(opf)) {
 		depth = 0;
 	} else {
-		depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
+		depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
 		limit = (limit * depth) >> bfqd->full_depth_shift;
 	}
 
@@ -693,7 +693,7 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 		depth = 1;
 
 	bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
-		__func__, bfqd->wr_busy_queues, op_is_sync(op), depth);
+		__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
 	if (depth)
 		data->shallow_depth = depth;
 }
@@ -6104,7 +6104,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
 static void bfq_update_insert_stats(struct request_queue *q,
 				    struct bfq_queue *bfqq,
 				    bool idle_timer_disabled,
-				    unsigned int cmd_flags)
+				    blk_opf_t cmd_flags)
 {
 	if (!bfqq)
 		return;
@@ -6129,7 +6129,7 @@ static void bfq_update_insert_stats(struct request_queue *q,
 static inline void bfq_update_insert_stats(struct request_queue *q,
 					   struct bfq_queue *bfqq,
 					   bool idle_timer_disabled,
-					   unsigned int cmd_flags) {}
+					   blk_opf_t cmd_flags) {}
 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
 
 static struct bfq_queue *bfq_init_rq(struct request *rq);
@@ -6141,7 +6141,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct bfq_queue *bfqq;
 	bool idle_timer_disabled = false;
-	unsigned int cmd_flags;
+	blk_opf_t cmd_flags;
 	LIST_HEAD(free);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ca8177d7bf7c..ad8e513d7e87 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -994,11 +994,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 
 void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
-			      unsigned int op);
-void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
-void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
+			      blk_opf_t opf);
+void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf);
+void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf);
 void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
-				  u64 io_start_time_ns, unsigned int op);
+				  u64 io_start_time_ns, blk_opf_t opf);
 void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
 void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
-- 
cgit v1.2.3-59-g8ed1b


From f8359efe4742a39b4ece554ab9d7e5f03c4fff83 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:34 -0700
Subject: block/mq-deadline: Use the new blk_opf_t type

Use the new blk_opf_t type for an argument that represents a bitwise
combination of a request operation and request flags. Rename that
argument from 'op' into 'opf'.

This patch does not change any functionality.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-9-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 1a9e835e816c..5639921dfa92 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -543,12 +543,12 @@ unlock:
  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
  * function is used by __blk_mq_get_tag().
  */
-static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
 {
 	struct deadline_data *dd = data->q->elevator->elevator_data;
 
 	/* Do not throttle synchronous reads. */
-	if (op_is_sync(op) && !op_is_write(op))
+	if (op_is_sync(opf) && !op_is_write(opf))
 		return;
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From d625fecd8af84ac669075caf1941ff0d1995de56 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Jul 2022 11:06:35 -0700
Subject: block/kyber: Use the new blk_opf_t type

Use the new blk_opf_t type for arguments that represent a bitwise
combination of a request operation and request flags. Rename those
arguments from 'op' into 'opf'.

This patch does not change any functionality.

Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220714180729.1065367-10-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/kyber-iosched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 8f7c745b4a57..b05357bced99 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -195,9 +195,9 @@ struct kyber_hctx_data {
 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 			     void *key);
 
-static unsigned int kyber_sched_domain(unsigned int op)
+static unsigned int kyber_sched_domain(blk_opf_t opf)
 {
-	switch (op & REQ_OP_MASK) {
+	switch (opf & REQ_OP_MASK) {
 	case REQ_OP_READ:
 		return KYBER_READ;
 	case REQ_OP_WRITE:
@@ -553,13 +553,13 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 	}
 }
 
-static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
 {
 	/*
 	 * We use the scheduler tags as per-hardware queue queueing tokens.
 	 * Async requests can be limited at this stage.
 	 */
-	if (!op_is_sync(op)) {
+	if (!op_is_sync(opf)) {
 		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
 
 		data->shallow_depth = kqd->async_depth;
-- 
cgit v1.2.3-59-g8ed1b


From 14a6e2eb7df5c7897c15b109cba29ab0c4a791b6 Mon Sep 17 00:00:00 2001
From: Jinke Han <hanjinke.666@bytedance.com>
Date: Wed, 20 Jul 2022 17:36:16 +0800
Subject: block: don't allow the same type rq_qos add more than once

In our test of iocost, we encountered some list add/del corruptions of
inner_walk list in ioc_timer_fn.

The reason can be described as follows:

cpu 0					cpu 1
ioc_qos_write				ioc_qos_write

ioc = q_to_ioc(queue);
if (!ioc) {
        ioc = kzalloc();
					ioc = q_to_ioc(queue);
					if (!ioc) {
						ioc = kzalloc();
						...
						rq_qos_add(q, rqos);
					}
        ...
        rq_qos_add(q, rqos);
        ...
}

When the io.cost.qos file is written by two cpus concurrently, rq_qos may
be added to one disk twice. In that case, there will be two iocs enabled
and running on one disk. They own different iocgs on their active list. In
the ioc_timer_fn function, because of the iocgs from two iocs have the
same root iocg, the root iocg's walk_list may be overwritten by each other
and this leads to list add/del corruptions in building or destroying the
inner_walk list.

And so far, the blk-rq-qos framework works in case that one instance for
one type rq_qos per queue by default. This patch make this explicit and
also fix the crash above.

Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20220720093616.70584-1-hanjinke.666@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c    | 20 +++++++++++++-------
 block/blk-iolatency.c | 18 +++++++++++-------
 block/blk-rq-qos.h    | 11 ++++++++++-
 block/blk-wbt.c       | 12 +++++++++++-
 4 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index b7082f2aed9c..7936e5f5821c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2886,15 +2886,21 @@ static int blk_iocost_init(struct request_queue *q)
 	 * called before policy activation completion, can't assume that the
 	 * target bio has an iocg associated and need to test for NULL iocg.
 	 */
-	rq_qos_add(q, rqos);
+	ret = rq_qos_add(q, rqos);
+	if (ret)
+		goto err_free_ioc;
+
 	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
-	if (ret) {
-		rq_qos_del(q, rqos);
-		free_percpu(ioc->pcpu_stat);
-		kfree(ioc);
-		return ret;
-	}
+	if (ret)
+		goto err_del_qos;
 	return 0;
+
+err_del_qos:
+	rq_qos_del(q, rqos);
+err_free_ioc:
+	free_percpu(ioc->pcpu_stat);
+	kfree(ioc);
+	return ret;
 }
 
 static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp)
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 79745c6d8e15..e285152345a2 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -771,19 +771,23 @@ int blk_iolatency_init(struct request_queue *q)
 	rqos->ops = &blkcg_iolatency_ops;
 	rqos->q = q;
 
-	rq_qos_add(q, rqos);
-
+	ret = rq_qos_add(q, rqos);
+	if (ret)
+		goto err_free;
 	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
-	if (ret) {
-		rq_qos_del(q, rqos);
-		kfree(blkiolat);
-		return ret;
-	}
+	if (ret)
+		goto err_qos_del;
 
 	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
 	INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
 
 	return 0;
+
+err_qos_del:
+	rq_qos_del(q, rqos);
+err_free:
+	kfree(blkiolat);
+	return ret;
 }
 
 static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 0e46052b018a..08b856570ad1 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 	init_waitqueue_head(&rq_wait->wait);
 }
 
-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 	blk_mq_freeze_queue(q);
 
 	spin_lock_irq(&q->queue_lock);
+	if (rq_qos_id(q, rqos->id))
+		goto ebusy;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
 	spin_unlock_irq(&q->queue_lock);
@@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 		blk_mq_debugfs_register_rqos(rqos);
 		mutex_unlock(&q->debugfs_mutex);
 	}
+
+	return 0;
+ebusy:
+	spin_unlock_irq(&q->queue_lock);
+	blk_mq_unfreeze_queue(q);
+	return -EBUSY;
+
 }
 
 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f2e4bf1dca47..a9982000b667 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
 	int i;
+	int ret;
 
 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
 	if (!rwb)
@@ -846,7 +847,10 @@ int wbt_init(struct request_queue *q)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	rq_qos_add(q, &rwb->rqos);
+	ret = rq_qos_add(q, &rwb->rqos);
+	if (ret)
+		goto err_free;
+
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
@@ -855,4 +859,10 @@ int wbt_init(struct request_queue *q)
 	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 
 	return 0;
+
+err_free:
+	blk_stat_free_callback(rwb->cb);
+	kfree(rwb);
+	return ret;
+
 }
-- 
cgit v1.2.3-59-g8ed1b


From 0a3e5cc7bbfcd571a2e53779ef7d7aa3c57d5432 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 20 Jul 2022 15:05:40 +0200
Subject: blk-mq: fix error handling in __blk_mq_alloc_disk

To fully clean up the queue if the disk allocation fails we need to
call blk_mq_destroy_queue and not just blk_put_queue.

Fixes: 6f8191fdf41d ("block: simplify disk shutdown")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220720130541.1323531-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d716b7f3763f..70177ee74295 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3960,7 +3960,7 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
 
 	disk = __alloc_disk_node(q, set->numa_node, lkclass);
 	if (!disk) {
-		blk_put_queue(q);
+		blk_mq_destroy_queue(q);
 		return ERR_PTR(-ENOMEM);
 	}
 	set_bit(GD_OWNS_QUEUE, &disk->state);
-- 
cgit v1.2.3-59-g8ed1b


From c5db2cfc6274692d821d33b59acb6ff615e350c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 20 Jul 2022 15:05:41 +0200
Subject: block: call blk_mq_exit_queue from disk_release for never added disks

To undo the all initialization from blk_mq_init_allocated_queue in case
of a probe failure where add_disk is never called we have to call
blk_mq_exit_queue from put_disk.

This relies on the fact that drivers always call blk_mq_free_tag_set
after calling put_disk in the probe error path if they have a gendisk
at all.

We should be doing this in general, but can't do it for the normal
teardown case (yet) as the tagset can be gone by the time the disk is
released once it was added.  I hope to sort this out properly eventually
but for now this isolated hack will do it.

Fixes: 6f8191fdf41d ("block: simplify disk shutdown")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220720130541.1323531-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index 44dfcf67ed96..e1d5b10ac193 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1138,6 +1138,18 @@ static void disk_release(struct device *dev)
 	might_sleep();
 	WARN_ON_ONCE(disk_live(disk));
 
+	/*
+	 * To undo the all initialization from blk_mq_init_allocated_queue in
+	 * case of a probe failure where add_disk is never called we have to
+	 * call blk_mq_exit_queue here. We can't do this for the more common
+	 * teardown case (yet) as the tagset can be gone by the time the disk
+	 * is released once it was added.
+	 */
+	if (queue_is_mq(disk->queue) &&
+	    test_bit(GD_OWNS_QUEUE, &disk->state) &&
+	    !test_bit(GD_ADDED, &disk->state))
+		blk_mq_exit_queue(disk->queue);
+
 	blkcg_exit_queue(disk->queue);
 
 	disk_release_events(disk);
@@ -1403,6 +1415,9 @@ EXPORT_SYMBOL(__blk_alloc_disk);
  * This decrements the refcount for the struct gendisk. When this reaches 0
  * we'll have disk_release() called.
  *
+ * Note: for blk-mq disk put_disk must be called before freeing the tag_set
+ * when handling probe errors (that is before add_disk() is called).
+ *
  * Context: Any context, but the last reference must not be dropped from
  *          atomic context.
  */
-- 
cgit v1.2.3-59-g8ed1b


From 828b5f017d9d5d0491cd2e71d48f4f2139078e2c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Jul 2022 08:34:32 +0200
Subject: block: remove __blk_get_queue

__blk_get_queue is only called by blk_get_queue, so merge the two.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220721063432.1714609-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 10 ++++------
 block/blk.h      |  5 -----
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 123468b9d2e4..3d286a256d3d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -461,12 +461,10 @@ fail_q:
  */
 bool blk_get_queue(struct request_queue *q)
 {
-	if (likely(!blk_queue_dying(q))) {
-		__blk_get_queue(q);
-		return true;
-	}
-
-	return false;
+	if (unlikely(blk_queue_dying(q)))
+		return false;
+	kobject_get(&q->kobj);
+	return true;
 }
 EXPORT_SYMBOL(blk_get_queue);
 
diff --git a/block/blk.h b/block/blk.h
index c4b084bfe87c..1d83b1d41cd1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -31,11 +31,6 @@ extern struct kmem_cache *blk_requestq_srcu_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
-static inline void __blk_get_queue(struct request_queue *q)
-{
-	kobject_get(&q->kobj);
-}
-
 bool is_flush_rq(struct request *req);
 
 struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
-- 
cgit v1.2.3-59-g8ed1b