Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - handle number of queue changes in the TCP and RDMA drivers (Daniel Wagner) - allow changing the number of queues in nvmet (Daniel Wagner) - also consider host_iface when checking ip options (Daniel Wagner) - don't map pages which can't come from HIGHMEM (Fabio M. De Francesco) - avoid unnecessary flush bios in nvmet (Guixin Liu) - shrink and better pack the nvme_iod structure (Keith Busch) - add comment for unaligned "fake" nqn (Linjun Bao) - print actual source IP address through sysfs "address" attr (Martin Belanger) - various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang) - handle effects after freeing the request (Keith Busch) - copy firmware_rev on each init (Keith Busch) - restrict management ioctls to admin (Keith Busch) - ensure subsystem reset is single threaded (Keith Busch) - report the actual number of tagset maps in nvme-pci (Keith Busch) - small fabrics authentication fixups (Christoph Hellwig) - add common code for tagset allocation and freeing (Christoph Hellwig) - stop using the request_queue in nvmet (Christoph Hellwig) - set min_align_mask before calculating max_hw_sectors (Rishabh Bhatnagar) - send a rediscover uevent when a persistent discovery controller reconnects (Sagi Grimberg) - misc nvmet-tcp fixes (Varun Prakash, zhenwei pi) - MD pull request via Song: - Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan. - Raid10 performance optimization, by Yu Kuai. - sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu) - IO scheduler switching quisce fix (Keith) - s390/dasd block driver updates (Stefan) - support for recovery for the ublk driver (ZiyangZhang) - rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph) - blk-mq and null_blk map fixes (Bart) - various bcache fixes (Coly, Jilin, Jules) - nbd signal hang fix (Shigeru) - block writeback throttling fix (Yu) - optimize the passthrough mapping handling (me) - prepare block cgroups to being gendisk based (Christoph) - get rid of an old PSI hack in the block layer, moving it to the callers instead where it belongs (Christoph) - blk-throttle fixes and cleanups (Yu) - misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj, Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming, Miaohe, Bart, Coly, Gaosheng * tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits) sbitmap: fix lockup while swapping block: add rationale for not using blk_mq_plug() when applicable block: adapt blk_mq_plug() to not plug for writes that require a zone lock s390/dasd: use blk_mq_alloc_disk blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all blk-mq: use quiesced elevator switch when reinitializing queues block: replace blk_queue_nowait with bdev_nowait nvme: remove nvme_ctrl_init_connect_q nvme-loop: use the tagset alloc/free helpers nvme-loop: store the generic nvme_ctrl in set->driver_data nvme-loop: initialize sqsize later nvme-fc: use the tagset alloc/free helpers nvme-fc: store the generic nvme_ctrl in set->driver_data nvme-fc: keep ctrl->sqsize in sync with opts->queue_size nvme-rdma: use the tagset alloc/free helpers nvme-rdma: store the generic nvme_ctrl in set->driver_data nvme-tcp: use the tagset alloc/free helpers nvme-tcp: store the generic nvme_ctrl in set->driver_data ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-10-07 09:19:14 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-10-07 09:19:14 -0700
commit: 513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
tree: c71e478fab1568da4706868b14eb67a75c148a8b /drivers/nvme/host/rdma.c
parent: Merge tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux (diff)
parent: sbitmap: fix lockup while swapping (diff)
download: wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.tar.xz
wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.zip
1 files changed, 60 insertions, 111 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3100643be299..5ad0ab2853a4 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -295,7 +295,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
 		struct request *rq, unsigned int hctx_idx,
 		unsigned int numa_node)
 {
-	struct nvme_rdma_ctrl *ctrl = set->driver_data;
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
@@ -320,7 +320,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
 static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 		unsigned int hctx_idx)
 {
-	struct nvme_rdma_ctrl *ctrl = data;
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data);
 	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
 
 	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
@@ -332,7 +332,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 		unsigned int hctx_idx)
 {
-	struct nvme_rdma_ctrl *ctrl = data;
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data);
 	struct nvme_rdma_queue *queue = &ctrl->queues[0];
 
 	BUG_ON(hctx_idx != 0);
@@ -696,11 +696,12 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
 	return ret;
 }
 
-static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
+static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
+				     int first, int last)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
+	for (i = first; i < last; i++) {
 		ret = nvme_rdma_start_queue(ctrl, i);
 		if (ret)
 			goto out_stop_queues;
@@ -709,7 +710,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
 	return 0;
 
 out_stop_queues:
-	for (i--; i >= 1; i--)
+	for (i--; i >= first; i--)
 		nvme_rdma_stop_queue(&ctrl->queues[i]);
 	return ret;
 }
@@ -787,64 +788,21 @@ out_free_queues:
 	return ret;
 }
 
-static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl)
+static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
 {
-	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-	struct blk_mq_tag_set *set = &ctrl->admin_tag_set;
-	int ret;
+	unsigned int cmd_size = sizeof(struct nvme_rdma_request) +
+				NVME_RDMA_DATA_SGL_SIZE;
 
-	memset(set, 0, sizeof(*set));
-	set->ops = &nvme_rdma_admin_mq_ops;
-	set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-	set->reserved_tags = NVMF_RESERVED_TAGS;
-	set->numa_node = nctrl->numa_node;
-	set->cmd_size = sizeof(struct nvme_rdma_request) +
-			NVME_RDMA_DATA_SGL_SIZE;
-	set->driver_data = ctrl;
-	set->nr_hw_queues = 1;
-	set->timeout = NVME_ADMIN_TIMEOUT;
-	set->flags = BLK_MQ_F_NO_SCHED;
-	ret = blk_mq_alloc_tag_set(set);
-	if (!ret)
-		ctrl->ctrl.admin_tagset = set;
-	return ret;
-}
-
-static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl)
-{
-	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-	struct blk_mq_tag_set *set = &ctrl->tag_set;
-	int ret;
+	if (ctrl->max_integrity_segments)
+		cmd_size += sizeof(struct nvme_rdma_sgl) +
+			    NVME_RDMA_METADATA_SGL_SIZE;
 
-	memset(set, 0, sizeof(*set));
-	set->ops = &nvme_rdma_mq_ops;
-	set->queue_depth = nctrl->sqsize + 1;
-	set->reserved_tags = NVMF_RESERVED_TAGS;
-	set->numa_node = nctrl->numa_node;
-	set->flags = BLK_MQ_F_SHOULD_MERGE;
-	set->cmd_size = sizeof(struct nvme_rdma_request) +
-			NVME_RDMA_DATA_SGL_SIZE;
-	if (nctrl->max_integrity_segments)
-		set->cmd_size += sizeof(struct nvme_rdma_sgl) +
-				 NVME_RDMA_METADATA_SGL_SIZE;
-	set->driver_data = ctrl;
-	set->nr_hw_queues = nctrl->queue_count - 1;
-	set->timeout = NVME_IO_TIMEOUT;
-	set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
-	ret = blk_mq_alloc_tag_set(set);
-	if (!ret)
-		ctrl->ctrl.tagset = set;
-	return ret;
+	return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
+			&nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size);
 }
 
-static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
-		bool remove)
+static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
 {
-	if (remove) {
-		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
-		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
-		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
-	}
 	if (ctrl->async_event_sqe.data) {
 		cancel_work_sync(&ctrl->ctrl.async_event_work);
 		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
@@ -886,26 +844,19 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		goto out_free_queue;
 
 	if (new) {
-		error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl);
+		error = nvme_alloc_admin_tag_set(&ctrl->ctrl,
+				&ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops,
+				BLK_MQ_F_NO_SCHED,
+				sizeof(struct nvme_rdma_request) +
+				NVME_RDMA_DATA_SGL_SIZE);
 		if (error)
 			goto out_free_async_qe;
 
-		ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
-		if (IS_ERR(ctrl->ctrl.fabrics_q)) {
-			error = PTR_ERR(ctrl->ctrl.fabrics_q);
-			goto out_free_tagset;
-		}
-
-		ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
-		if (IS_ERR(ctrl->ctrl.admin_q)) {
-			error = PTR_ERR(ctrl->ctrl.admin_q);
-			goto out_cleanup_fabrics_q;
-		}
 	}
 
 	error = nvme_rdma_start_queue(ctrl, 0);
 	if (error)
-		goto out_cleanup_queue;
+		goto out_remove_admin_tag_set;
 
 	error = nvme_enable_ctrl(&ctrl->ctrl);
 	if (error)
@@ -932,15 +883,9 @@ out_quiesce_queue:
 out_stop_queue:
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
-out_cleanup_queue:
-	if (new)
-		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
-out_cleanup_fabrics_q:
+out_remove_admin_tag_set:
 	if (new)
-		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
-out_free_tagset:
-	if (new)
-		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+		nvme_remove_admin_tag_set(&ctrl->ctrl);
 out_free_async_qe:
 	if (ctrl->async_event_sqe.data) {
 		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
@@ -952,19 +897,9 @@ out_free_queue:
 	return error;
 }
 
-static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
-		bool remove)
-{
-	if (remove) {
-		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
-		blk_mq_free_tag_set(ctrl->ctrl.tagset);
-	}
-	nvme_rdma_free_io_queues(ctrl);
-}
-
 static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 {
-	int ret;
+	int ret, nr_queues;
 
 	ret = nvme_rdma_alloc_io_queues(ctrl);
 	if (ret)
@@ -974,15 +909,17 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 		ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl);
 		if (ret)
 			goto out_free_io_queues;
-
-		ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
-		if (ret)
-			goto out_free_tag_set;
 	}
 
-	ret = nvme_rdma_start_io_queues(ctrl);
+	/*
+	 * Only start IO queues for which we have allocated the tagset
+	 * and limitted it to the available queues. On reconnects, the
+	 * queue number might have changed.
+	 */
+	nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count);
+	ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues);
 	if (ret)
-		goto out_cleanup_connect_q;
+		goto out_cleanup_tagset;
 
 	if (!new) {
 		nvme_start_queues(&ctrl->ctrl);
@@ -1000,19 +937,25 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 		nvme_unfreeze(&ctrl->ctrl);
 	}
 
+	/*
+	 * If the number of queues has increased (reconnect case)
+	 * start all new queues now.
+	 */
+	ret = nvme_rdma_start_io_queues(ctrl, nr_queues,
+					ctrl->tag_set.nr_hw_queues + 1);
+	if (ret)
+		goto out_wait_freeze_timed_out;
+
 	return 0;
 
 out_wait_freeze_timed_out:
 	nvme_stop_queues(&ctrl->ctrl);
 	nvme_sync_io_queues(&ctrl->ctrl);
 	nvme_rdma_stop_io_queues(ctrl);
-out_cleanup_connect_q:
+out_cleanup_tagset:
 	nvme_cancel_tagset(&ctrl->ctrl);
 	if (new)
-		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
-out_free_tag_set:
-	if (new)
-		blk_mq_free_tag_set(ctrl->ctrl.tagset);
+		nvme_remove_io_tag_set(&ctrl->ctrl);
 out_free_io_queues:
 	nvme_rdma_free_io_queues(ctrl);
 	return ret;
@@ -1025,9 +968,11 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	blk_sync_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
-	if (remove)
+	if (remove) {
 		nvme_start_admin_queue(&ctrl->ctrl);
-	nvme_rdma_destroy_admin_queue(ctrl, remove);
+		nvme_remove_admin_tag_set(&ctrl->ctrl);
+	}
+	nvme_rdma_destroy_admin_queue(ctrl);
 }
 
 static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
@@ -1039,9 +984,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 		nvme_sync_io_queues(&ctrl->ctrl);
 		nvme_rdma_stop_io_queues(ctrl);
 		nvme_cancel_tagset(&ctrl->ctrl);
-		if (remove)
+		if (remove) {
 			nvme_start_queues(&ctrl->ctrl);
-		nvme_rdma_destroy_io_queues(ctrl, remove);
+			nvme_remove_io_tag_set(&ctrl->ctrl);
+		}
+		nvme_rdma_free_io_queues(ctrl);
 	}
 }
 
@@ -1163,14 +1110,18 @@ destroy_io:
 		nvme_sync_io_queues(&ctrl->ctrl);
 		nvme_rdma_stop_io_queues(ctrl);
 		nvme_cancel_tagset(&ctrl->ctrl);
-		nvme_rdma_destroy_io_queues(ctrl, new);
+		if (new)
+			nvme_remove_io_tag_set(&ctrl->ctrl);
+		nvme_rdma_free_io_queues(ctrl);
 	}
 destroy_admin:
 	nvme_stop_admin_queue(&ctrl->ctrl);
 	blk_sync_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	nvme_cancel_admin_tagset(&ctrl->ctrl);
-	nvme_rdma_destroy_admin_queue(ctrl, new);
+	if (new)
+		nvme_remove_admin_tag_set(&ctrl->ctrl);
+	nvme_rdma_destroy_admin_queue(ctrl);
 	return ret;
 }
 
@@ -2188,9 +2139,9 @@ static void nvme_rdma_complete_rq(struct request *rq)
 	nvme_complete_rq(rq);
 }
 
-static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
+static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 {
-	struct nvme_rdma_ctrl *ctrl = set->driver_data;
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 
 	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
@@ -2231,8 +2182,6 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 		ctrl->io_queues[HCTX_TYPE_DEFAULT],
 		ctrl->io_queues[HCTX_TYPE_READ],
 		ctrl->io_queues[HCTX_TYPE_POLL]);
-
-	return 0;
 }
 
 static const struct blk_mq_ops nvme_rdma_mq_ops = {
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-10-07 09:19:14 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-10-07 09:19:14 -0700
commit	513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
tree	c71e478fab1568da4706868b14eb67a75c148a8b /drivers/nvme/host/rdma.c
parent	Merge tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux (diff)
parent	sbitmap: fix lockup while swapping (diff)
download	wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.tar.xz wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.zip