diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Kconfig | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 12 | ||||
-rw-r--r-- | drivers/nvme/host/hwmon.c | 13 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 46 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 9 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 80 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 15 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 1 |
11 files changed, 110 insertions, 73 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index c6439638a419..b9358db83e96 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config NVME_CORE tristate + select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY config BLK_DEV_NVME tristate "NVM Express block device" diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5dc32b72e7fa..ada59df642d2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); * nvme_reset_wq - hosts nvme reset works * nvme_delete_wq - hosts nvme delete works * - * nvme_wq will host works such are scan, aen handling, fw activation, - * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq + * nvme_wq will host works such as scan, aen handling, fw activation, + * keep-alive, periodic reconnects etc. nvme_reset_wq * runs reset works which also flush works hosted on nvme_wq for * serialization purposes. nvme_delete_wq host controller deletion * works which flush reset works for serialization. @@ -976,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) startka = true; spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } static int nvme_keep_alive(struct nvme_ctrl *ctrl) @@ -1006,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work) dev_dbg(ctrl->device, "reschedule traffic based keep-alive timer\n"); ctrl->comp_seen = false; - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); return; } @@ -1023,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) if (unlikely(ctrl->kato == 0)) return; - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) @@ -3867,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) if (!log) return; - if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log, + if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log, sizeof(*log), 0)) dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); kfree(log); diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index a5af21f5d370..2e6477ed420f 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -5,14 +5,11 @@ */ #include <linux/hwmon.h> +#include <linux/units.h> #include <asm/unaligned.h> #include "nvme.h" -/* These macros should be moved to linux/temperature.h */ -#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000) -#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150) - struct nvme_hwmon_data { struct nvme_ctrl *ctrl; struct nvme_smart_log log; @@ -35,7 +32,7 @@ static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, return -EIO; if (ret < 0) return ret; - *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK); + *temp = kelvin_to_millicelsius(status & NVME_TEMP_THRESH_MASK); return 0; } @@ -46,7 +43,7 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; int ret; - temp = MILLICELSIUS_TO_KELVIN(temp); + temp = millicelsius_to_kelvin(temp); threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK); if (under) @@ -88,7 +85,7 @@ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp_min: return nvme_get_temp_thresh(data->ctrl, channel, true, val); case hwmon_temp_crit: - *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp); + *val = kelvin_to_millicelsius(data->ctrl->cctemp); return 0; default: break; @@ -105,7 +102,7 @@ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, temp = get_unaligned_le16(log->temperature); else temp = le16_to_cpu(log->temp_sensor[channel - 1]); - *val = KELVIN_TO_MILLICELSIUS(temp); + *val = kelvin_to_millicelsius(temp); break; case hwmon_temp_alarm: *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 365a2ddbeaa7..9c80f9f08149 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -167,7 +167,6 @@ struct nvme_queue { /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; - struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -376,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); - WARN_ON(nvmeq->tags); hctx->driver_data = nvmeq; - nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } -static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->tags = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; - if (!nvmeq->tags) - nvmeq->tags = &dev->tagset.tags[hctx_idx]; - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; @@ -948,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) writel(head, nvmeq->q_db + nvmeq->dev->db_stride); } +static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return nvmeq->dev->admin_tagset.tags[0]; + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; +} + static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; @@ -972,7 +966,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) return; } - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } @@ -1407,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) nvme_poll_irqdisable(nvmeq, -1); } +/* + * Called only on a device that has been disabled and after all other threads + * that can check this device's completion queues have synced. This is the + * last chance for the driver to see a natural completion before + * nvme_cancel_request() terminates all incomplete requests. + */ +static void nvme_reap_pending_cqes(struct nvme_dev *dev) +{ + u16 start, end; + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) { + nvme_process_cq(&dev->queues[i], &start, &end, -1); + nvme_complete_cqes(&dev->queues[i], start, end); + } +} + static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, int entry_size) { @@ -1572,7 +1583,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -2242,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) if (timeout == 0) return false; - /* handle any remaining CQEs */ - if (opcode == nvme_admin_delete_cq && - !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags)) - nvme_poll_irqdisable(nvmeq, -1); - sent--; if (nr_queues) goto retry; @@ -2435,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); + nvme_reap_pending_cqes(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2a47c6c5007e..3e85c5cacefd 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1088,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) return; - queue_work(nvme_wq, &ctrl->err_work); + queue_work(nvme_reset_wq, &ctrl->err_work); } static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6d43b23a0fc8..49d4373b84eb 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -422,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return; - queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work); + queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); } static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, @@ -1054,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w) } else if (unlikely(result < 0)) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", result); - if (result != -EPIPE) + + /* + * Fail the request unless peer closed the connection, + * in which case error recovery flow will complete all. + */ + if ((result != -EPIPE) && (result != -ECONNRESET)) nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); return; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 28438b833c1b..576de773b4db 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -129,27 +129,8 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); } -static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) -{ - struct nvmet_req *req; - - while (1) { - mutex_lock(&ctrl->lock); - if (!ctrl->nr_async_event_cmds) { - mutex_unlock(&ctrl->lock); - return; - } - - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); - } -} - -static void nvmet_async_event_work(struct work_struct *work) +static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) { - struct nvmet_ctrl *ctrl = - container_of(work, struct nvmet_ctrl, async_event_work); struct nvmet_async_event *aen; struct nvmet_req *req; @@ -159,18 +140,41 @@ static void nvmet_async_event_work(struct work_struct *work) struct nvmet_async_event, entry); if (!aen || !ctrl->nr_async_event_cmds) { mutex_unlock(&ctrl->lock); - return; + break; } req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - nvmet_set_result(req, nvmet_async_event_result(aen)); + if (status == 0) + nvmet_set_result(req, nvmet_async_event_result(aen)); list_del(&aen->entry); kfree(aen); mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, 0); + nvmet_req_complete(req, status); + } +} + +static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) +{ + struct nvmet_req *req; + + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds) { + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; + mutex_unlock(&ctrl->lock); + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); + mutex_lock(&ctrl->lock); } + mutex_unlock(&ctrl->lock); +} + +static void nvmet_async_event_work(struct work_struct *work) +{ + struct nvmet_ctrl *ctrl = + container_of(work, struct nvmet_ctrl, async_event_work); + + nvmet_async_events_process(ctrl, 0); } void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, @@ -555,7 +559,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) } else { struct nvmet_ns *old; - list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { + list_for_each_entry_rcu(old, &subsys->namespaces, dev_link, + lockdep_is_held(&subsys->lock)) { BUG_ON(ns->nsid == old->nsid); if (ns->nsid < old->nsid) break; @@ -752,19 +757,24 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) void nvmet_sq_destroy(struct nvmet_sq *sq) { + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; + struct nvmet_ctrl *ctrl = sq->ctrl; + /* * If this is the admin queue, complete all AERs so that our * queue doesn't have outstanding requests on it. */ - if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) - nvmet_async_events_free(sq->ctrl); + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { + nvmet_async_events_process(ctrl, status); + nvmet_async_events_free(ctrl); + } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); - if (sq->ctrl) { - nvmet_ctrl_put(sq->ctrl); + if (ctrl) { + nvmet_ctrl_put(ctrl); sq->ctrl = NULL; /* allows reusing the queue later */ } } @@ -938,6 +948,17 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) } EXPORT_SYMBOL_GPL(nvmet_check_data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) +{ + if (unlikely(data_len > req->transfer_len)) { + req->error_loc = offsetof(struct nvme_common_command, dptr); + nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); + return false; + } + + return true; +} + int nvmet_req_alloc_sgl(struct nvmet_req *req) { struct pci_dev *p2p_dev = NULL; @@ -1172,7 +1193,8 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, ctrl->p2p_client = get_device(req->p2p_client); - list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link, + lockdep_is_held(&ctrl->subsys->lock)) nvmet_p2pmem_ns_add_p2p(ctrl, ns); } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f7297473d9eb..feef15c38ec9 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -109,6 +109,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 ret; old = cmpxchg(&req->sq->ctrl, NULL, ctrl); if (old) { @@ -119,7 +120,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto err; } /* note: convert queue size from 0's-based value to 1's-based value */ @@ -132,16 +134,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) } if (ctrl->ops->install_queue) { - u16 ret = ctrl->ops->install_queue(req->sq); - + ret = ctrl->ops->install_queue(req->sq); if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", - qid, ret, ctrl->cntlid); - return ret; + qid, ctrl->cntlid, ret); + goto err; } } return 0; + +err: + req->sq->ctrl = NULL; + return ret; } static void nvmet_execute_admin_connect(struct nvmet_req *req) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index b6fca0e421ef..ea0e596be15d 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -280,7 +280,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) static void nvmet_bdev_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; switch (le32_to_cpu(req->cmd->dsm.attributes)) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index caebfce06605..cd5670b83118 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -336,7 +336,7 @@ static void nvmet_file_dsm_work(struct work_struct *w) static void nvmet_file_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 46df45e837c9..eda28b22a2c8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -374,6 +374,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); |