diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 09:19:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 09:19:14 -0700 |
commit | 513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch) | |
tree | c71e478fab1568da4706868b14eb67a75c148a8b /drivers/nvme/host/rdma.c | |
parent | Merge tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux (diff) | |
parent | sbitmap: fix lockup while swapping (diff) | |
download | wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.tar.xz wireguard-linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.zip |
Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Christoph:
- handle number of queue changes in the TCP and RDMA drivers
(Daniel Wagner)
- allow changing the number of queues in nvmet (Daniel Wagner)
- also consider host_iface when checking ip options (Daniel
Wagner)
- don't map pages which can't come from HIGHMEM (Fabio M. De
Francesco)
- avoid unnecessary flush bios in nvmet (Guixin Liu)
- shrink and better pack the nvme_iod structure (Keith Busch)
- add comment for unaligned "fake" nqn (Linjun Bao)
- print actual source IP address through sysfs "address" attr
(Martin Belanger)
- various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang)
- handle effects after freeing the request (Keith Busch)
- copy firmware_rev on each init (Keith Busch)
- restrict management ioctls to admin (Keith Busch)
- ensure subsystem reset is single threaded (Keith Busch)
- report the actual number of tagset maps in nvme-pci (Keith
Busch)
- small fabrics authentication fixups (Christoph Hellwig)
- add common code for tagset allocation and freeing (Christoph
Hellwig)
- stop using the request_queue in nvmet (Christoph Hellwig)
- set min_align_mask before calculating max_hw_sectors (Rishabh
Bhatnagar)
- send a rediscover uevent when a persistent discovery controller
reconnects (Sagi Grimberg)
- misc nvmet-tcp fixes (Varun Prakash, zhenwei pi)
- MD pull request via Song:
- Various raid5 fix and clean up, by Logan Gunthorpe and David
Sloan.
- Raid10 performance optimization, by Yu Kuai.
- sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu)
- IO scheduler switching quisce fix (Keith)
- s390/dasd block driver updates (Stefan)
- support for recovery for the ublk driver (ZiyangZhang)
- rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph)
- blk-mq and null_blk map fixes (Bart)
- various bcache fixes (Coly, Jilin, Jules)
- nbd signal hang fix (Shigeru)
- block writeback throttling fix (Yu)
- optimize the passthrough mapping handling (me)
- prepare block cgroups to being gendisk based (Christoph)
- get rid of an old PSI hack in the block layer, moving it to the
callers instead where it belongs (Christoph)
- blk-throttle fixes and cleanups (Yu)
- misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj,
Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming,
Miaohe, Bart, Coly, Gaosheng
* tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits)
sbitmap: fix lockup while swapping
block: add rationale for not using blk_mq_plug() when applicable
block: adapt blk_mq_plug() to not plug for writes that require a zone lock
s390/dasd: use blk_mq_alloc_disk
blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep
nvmet: don't look at the request_queue in nvmet_bdev_set_limits
nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all
blk-mq: use quiesced elevator switch when reinitializing queues
block: replace blk_queue_nowait with bdev_nowait
nvme: remove nvme_ctrl_init_connect_q
nvme-loop: use the tagset alloc/free helpers
nvme-loop: store the generic nvme_ctrl in set->driver_data
nvme-loop: initialize sqsize later
nvme-fc: use the tagset alloc/free helpers
nvme-fc: store the generic nvme_ctrl in set->driver_data
nvme-fc: keep ctrl->sqsize in sync with opts->queue_size
nvme-rdma: use the tagset alloc/free helpers
nvme-rdma: store the generic nvme_ctrl in set->driver_data
nvme-tcp: use the tagset alloc/free helpers
nvme-tcp: store the generic nvme_ctrl in set->driver_data
...
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r-- | drivers/nvme/host/rdma.c | 171 |
1 files changed, 60 insertions, 111 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3100643be299..5ad0ab2853a4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -295,7 +295,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; @@ -320,7 +320,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -332,7 +332,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -696,11 +696,12 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) return ret; } -static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl, + int first, int last) { int i, ret = 0; - for (i = 1; i < ctrl->ctrl.queue_count; i++) { + for (i = first; i < last; i++) { ret = nvme_rdma_start_queue(ctrl, i); if (ret) goto out_stop_queues; @@ -709,7 +710,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_stop_queues: - for (i--; i >= 1; i--) + for (i--; i >= first; i--) nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } @@ -787,64 +788,21 @@ out_free_queues: return ret; } -static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) +static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; + unsigned int cmd_size = sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - set->flags = BLK_MQ_F_NO_SCHED; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.admin_tagset = set; - return ret; -} - -static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; + if (ctrl->max_integrity_segments) + cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - if (nctrl->max_integrity_segments) - set->cmd_size += sizeof(struct nvme_rdma_sgl) + - NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.tagset = set; - return ret; + return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, + &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size); } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, - bool remove) +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) { - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); - } if (ctrl->async_event_sqe.data) { cancel_work_sync(&ctrl->ctrl.async_event_work); nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -886,26 +844,19 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_queue; if (new) { - error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, + &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, + BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE); if (error) goto out_free_async_qe; - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_rdma_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_remove_admin_tag_set; error = nvme_enable_ctrl(&ctrl->ctrl); if (error) @@ -932,15 +883,9 @@ out_quiesce_queue: out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); -out_cleanup_queue: - if (new) - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: +out_remove_admin_tag_set: if (new) - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_async_qe: if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -952,19 +897,9 @@ out_free_queue: return error; } -static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, - bool remove) -{ - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(ctrl->ctrl.tagset); - } - nvme_rdma_free_io_queues(ctrl); -} - static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { - int ret; + int ret, nr_queues; ret = nvme_rdma_alloc_io_queues(ctrl); if (ret) @@ -974,15 +909,17 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; } - ret = nvme_rdma_start_io_queues(ctrl); + /* + * Only start IO queues for which we have allocated the tagset + * and limitted it to the available queues. On reconnects, the + * queue number might have changed. + */ + nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); + ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; if (!new) { nvme_start_queues(&ctrl->ctrl); @@ -1000,19 +937,25 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nvme_unfreeze(&ctrl->ctrl); } + /* + * If the number of queues has increased (reconnect case) + * start all new queues now. + */ + ret = nvme_rdma_start_io_queues(ctrl, nr_queues, + ctrl->tag_set.nr_hw_queues + 1); + if (ret) + goto out_wait_freeze_timed_out; + return 0; out_wait_freeze_timed_out: nvme_stop_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); -out_cleanup_connect_q: +out_cleanup_tagset: nvme_cancel_tagset(&ctrl->ctrl); if (new) - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.tagset); + nvme_remove_io_tag_set(&ctrl->ctrl); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; @@ -1025,9 +968,11 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_admin_queue(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(&ctrl->ctrl); + } + nvme_rdma_destroy_admin_queue(ctrl); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, @@ -1039,9 +984,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_queues(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, remove); + nvme_remove_io_tag_set(&ctrl->ctrl); + } + nvme_rdma_free_io_queues(ctrl); } } @@ -1163,14 +1110,18 @@ destroy_io: nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, new); + if (new) + nvme_remove_io_tag_set(&ctrl->ctrl); + nvme_rdma_free_io_queues(ctrl); } destroy_admin: nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, new); + if (new) + nvme_remove_admin_tag_set(&ctrl->ctrl); + nvme_rdma_destroy_admin_queue(ctrl); return ret; } @@ -2188,9 +2139,9 @@ static void nvme_rdma_complete_rq(struct request *rq) nvme_complete_rq(rq); } -static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) +static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { @@ -2231,8 +2182,6 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT], ctrl->io_queues[HCTX_TYPE_READ], ctrl->io_queues[HCTX_TYPE_POLL]); - - return 0; } static const struct blk_mq_ops nvme_rdma_mq_ops = { |