From 4fb135ad154060bf289ccf35734e3d3b6e5029a7 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 19 Apr 2018 19:43:42 +0200 Subject: nvme: fc: provide a descriptive error Provide a descriptive error in case an lport to rport association isn't found when creating the FC-NVME controller. Currently it's very hard to debug the reason for a failed connect attempt without a look at the source. Signed-off-by: Johannes Thumshirn Reviewed-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6cb26bcf6ec0..8b66879b4ebf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3284,6 +3284,8 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) } spin_unlock_irqrestore(&nvme_fc_lock, flags); + pr_warn("%s: %s - %s combination not found\n", + __func__, opts->traddr, opts->host_traddr); return ERR_PTR(-ENOENT); } -- cgit v1.2.3-59-g8ed1b From cde6bf4f440631b2735be83c39426f4f469b0b21 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 26 Apr 2018 14:25:15 -0600 Subject: nvme: change order of qid and cmdid in completion trace Keith reported that command submission and command completion tracepoints have the order of the cmdid and qid fields swapped. While it isn't easily possible to change the command submission tracepoint, as there is a regression test parsing it in blktests we can swap the command completion tracepoint to have the fields aligned. Signed-off-by: Johannes Thumshirn Reported-by: Keith Busch Reviewed-by: Jens Axboe Signed-off-by: Keith Busch --- drivers/nvme/host/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index ea91fccd1bc0..01390f0e1671 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -148,8 +148,8 @@ TRACE_EVENT(nvme_complete_rq, __entry->flags = nvme_req(req)->flags; __entry->status = nvme_req(req)->status; ), - TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u", - __entry->cid, __entry->qid, __entry->result, + TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u", + __entry->qid, __entry->cid, __entry->result, __entry->retries, __entry->flags, __entry->status) ); -- cgit v1.2.3-59-g8ed1b From 0302ae60fc2368de5c561be49e2d9ea26dd462de Mon Sep 17 00:00:00 2001 From: Micah Parrish Date: Thu, 12 Apr 2018 13:25:25 -0600 Subject: NVMe: Add Quirk Delay before CHK RDY for Seagate Nytro Flash Storage Add Seagate Nytro Flash Storage nvme drive to quirk list for NVME_QUIRK_DELAY_BEFORE_CHK_RDY, which solves a bug where the drive is probed on hot-add before the firmare is ready, I/O errors are generated while reading sector 0, and linux is "unable to read partition table". Signed-off-by: Micah Parrish Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..af88f3fe2cc0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2704,6 +2704,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, + { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ -- cgit v1.2.3-59-g8ed1b From ea48e877994f086af481427bac110aa63686c3ce Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Thu, 26 Apr 2018 14:59:19 -0600 Subject: nvme: lightnvm: add granby support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new lightnvm quirk to identify CNEX’s Granby controller. Signed-off-by: Wei Xu Reviewed-by: Javier González Reviewed-by: Matias Bjørling Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index af88f3fe2cc0..8a86dbb0583d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2720,6 +2720,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -- cgit v1.2.3-59-g8ed1b From 1811977568e0f59d145da087e45f3dca09dab1c3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 27 Apr 2018 13:42:52 -0600 Subject: nvme/pci: Use async_schedule for initial reset work This patch schedules the initial controller reset in an async_domain so that it can be synchronized from wait_for_device_probe(). This way the kernel waits for the initial nvme controller scan to complete for all devices before proceeding with the boot sequence, which may have nvme dependencies. Reported-by: Mikulas Patocka Tested-by: Mikulas Patocka Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8a86dbb0583d..dcd1be005eef 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -2488,6 +2489,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } +static void nvme_async_probe(void *data, async_cookie_t cookie) +{ + struct nvme_dev *dev = data; + nvme_reset_ctrl_sync(&dev->ctrl); + flush_work(&dev->ctrl.scan_work); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -2532,7 +2540,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - nvme_reset_ctrl(&dev->ctrl); + async_schedule(nvme_async_probe, dev); return 0; -- cgit v1.2.3-59-g8ed1b From 3831761eb859f5599a522f064007b31100510c3a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 May 2018 11:06:54 -0600 Subject: nvme: only reconfigure discard if necessary Currently nvme reconfigures discard for every disk revalidation. This is problematic because any O_WRONLY or O_RDWR open will trigger a partition scan through udev/systemd, and we will reconfigure discard. This blows away any user settings, like discard_max_bytes. Only re-configure the user settable settings if we need to. Signed-off-by: Jens Axboe [removed redundant queue flag setting] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9df4f71e58ca..62262fac7a5d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1347,13 +1347,19 @@ static void nvme_set_chunk_size(struct nvme_ns *ns) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } -static void nvme_config_discard(struct nvme_ctrl *ctrl, - unsigned stream_alignment, struct request_queue *queue) +static void nvme_config_discard(struct nvme_ns *ns) { + struct nvme_ctrl *ctrl = ns->ctrl; + struct request_queue *queue = ns->queue; u32 size = queue_logical_block_size(queue); - if (stream_alignment) - size *= stream_alignment; + if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + return; + } + + if (ctrl->nr_streams && ns->sws && ns->sgs) + size *= ns->sws * ns->sgs; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); @@ -1361,9 +1367,12 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; + /* If discard is already enabled, don't reset queue limits */ + if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + return; + blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1407,10 +1416,6 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; - unsigned stream_alignment = 0; - - if (ns->ctrl->nr_streams && ns->sws && ns->sgs) - stream_alignment = ns->sws * ns->sgs; blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1424,10 +1429,9 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_init_integrity(disk, ns->ms, ns->pi_type); if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) capacity = 0; - set_capacity(disk, capacity); - if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ns->ctrl, stream_alignment, disk->queue); + set_capacity(disk, capacity); + nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); } -- cgit v1.2.3-59-g8ed1b From 80f513b5056d0bf127653d2327b7b24e322dc7e3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 7 May 2018 08:30:24 -0600 Subject: nvme/pci: Hold controller reference during async probe It is possible the driver's remove may have freed the controller if the remove callback is invoked prior to the async_schedule starting the reset_work. This patch fixes that by holding a reference on the controller. Reported-by: Mikulas Patocka Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index dcd1be005eef..7acecdf25621 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2492,8 +2492,10 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) static void nvme_async_probe(void *data, async_cookie_t cookie) { struct nvme_dev *dev = data; + nvme_reset_ctrl_sync(&dev->ctrl); flush_work(&dev->ctrl.scan_work); + nvme_put_ctrl(&dev->ctrl); } static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -2540,6 +2542,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + nvme_get_ctrl(&dev->ctrl); async_schedule(nvme_async_probe, dev); return 0; -- cgit v1.2.3-59-g8ed1b From cc1d5e749a2e1cf59fa940b976181e631d6985e1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 May 2018 08:34:20 -0600 Subject: nvme/pci: Sync controller reset for AER slot_reset AER handling expects a successful return from slot_reset means the driver made the device functional again. The nvme driver had been using an asynchronous reset to recover the device, so the device may still be initializing after control is returned to the AER handler. This creates problems for subsequent event handling, causing the initializion to fail. This patch fixes that by syncing the controller reset before returning to the AER driver, and reporting the true state of the reset. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199657 Reported-by: Alex Gagniuc Cc: Sinan Kaya Cc: Bjorn Helgaas Cc: stable@vger.kernel.org Tested-by: Alex Gagniuc Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7acecdf25621..b58aebb347a0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2681,8 +2681,15 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - nvme_reset_ctrl(&dev->ctrl); - return PCI_ERS_RESULT_RECOVERED; + nvme_reset_ctrl_sync(&dev->ctrl); + + switch (dev->ctrl.state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: + return PCI_ERS_RESULT_RECOVERED; + default: + return PCI_ERS_RESULT_DISCONNECT; + } } static void nvme_error_resume(struct pci_dev *pdev) -- cgit v1.2.3-59-g8ed1b From 287a63ebbe7c0c1e1c16f7462b4129edf5cd08df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 May 2018 18:31:46 +0200 Subject: nvme: mark the result argument to nvme_complete_async_event volatile We'll need that in the PCIe driver soon as we'll read it straight off the CQ. Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 62262fac7a5d..b070c659391f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3344,7 +3344,7 @@ static void nvme_fw_act_work(struct work_struct *work) } void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res) + volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 061fecfd44f5..7a872b0d53a7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -400,7 +400,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res); + volatile union nvme_result *res); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); -- cgit v1.2.3-59-g8ed1b From 750dde4472e48eadf28221b7eb02d493db1bcfd0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 May 2018 08:37:04 -0600 Subject: nvme-pci: simplify nvme_cqe_valid We always look at the current CQ head and phase, so don't pass these as separate arguments, and rename the function to nvme_cqe_pending. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b58aebb347a0..cf36dd39f2a5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -915,10 +915,10 @@ static void nvme_pci_complete_rq(struct request *req) } /* We read the CQE phase first to check if the rest of the entry is valid */ -static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, - u16 phase) +static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; + return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == + nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -965,7 +965,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, static inline bool nvme_read_cqe(struct nvme_queue *nvmeq, struct nvme_completion *cqe) { - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { + if (nvme_cqe_pending(nvmeq)) { *cqe = nvmeq->cqes[nvmeq->cq_head]; if (++nvmeq->cq_head == nvmeq->q_depth) { @@ -1006,7 +1006,7 @@ static irqreturn_t nvme_irq(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data) { struct nvme_queue *nvmeq = data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (nvme_cqe_pending(nvmeq)) return IRQ_WAKE_THREAD; return IRQ_NONE; } @@ -1016,7 +1016,7 @@ static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) struct nvme_completion cqe; int found = 0, consumed = 0; - if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (!nvme_cqe_pending(nvmeq)) return 0; spin_lock_irq(&nvmeq->q_lock); -- cgit v1.2.3-59-g8ed1b From f9dde187fa921c12a8680089a77595b866e65455 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 May 2018 18:31:48 +0200 Subject: nvme-pci: remove cq check after submission We always check the completion queue after submitting, but in my testing this isn't a win even on DRAM/xpoint devices. In some cases it's actually worse. Kill it. Signed-off-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cf36dd39f2a5..577570d3e1f4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -69,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); struct nvme_dev; struct nvme_queue; -static void nvme_process_cq(struct nvme_queue *nvmeq); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); /* @@ -896,7 +895,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_cleanup_iod; } __nvme_submit_cmd(nvmeq, &cmnd); - nvme_process_cq(nvmeq); spin_unlock_irq(&nvmeq->q_lock); return BLK_STS_OK; out_cleanup_iod: -- cgit v1.2.3-59-g8ed1b From d1f06f4ae0410f8e5025f3c9129a52b86579e174 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 May 2018 18:31:49 +0200 Subject: nvme-pci: move ->cq_vector == -1 check outside of ->q_lock We only clear it dynamically in nvme_suspend_queue(). When we do, ensure to do a full flush so that any nvme_queue_rq() invocation will see it. Ideally we'd kill this check completely, but we're using it to flush requests on a dying queue. Signed-off-by: Jens Axboe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 577570d3e1f4..3dfedc84a921 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -872,6 +872,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_command cmnd; blk_status_t ret; + /* + * We should not need to do this, but we're still using this to + * ensure we can drain requests on a dying queue. + */ + if (unlikely(nvmeq->cq_vector < 0)) + return BLK_STS_IOERR; + ret = nvme_setup_cmd(ns, req, &cmnd); if (ret) return ret; @@ -889,11 +896,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); spin_lock_irq(&nvmeq->q_lock); - if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_STS_IOERR; - spin_unlock_irq(&nvmeq->q_lock); - goto out_cleanup_iod; - } __nvme_submit_cmd(nvmeq, &cmnd); spin_unlock_irq(&nvmeq->q_lock); return BLK_STS_OK; @@ -1321,6 +1323,12 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); + /* + * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without + * having to grab the lock. + */ + mb(); + if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); -- cgit v1.2.3-59-g8ed1b From 5cb525c8315f1dd9232b59cd1cf1e0f19ff1a5df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 May 2018 18:31:50 +0200 Subject: nvme-pci: handle completions outside of the queue lock Split the completion of events into a two part process: 1) Reap the events inside the queue lock 2) Complete the events outside the queue lock Since we never wrap the queue, we can access it locklessly after we've updated the completion queue head. This patch started off with batching events on the stack, but with this trick we don't have to. Keith Busch came up with that idea. Note that this kills the ->cqe_seen as well. I haven't been able to trigger any ill effects of this. If we do race with polling every so often, it should be rare enough NOT to trigger any issues. Signed-off-by: Jens Axboe Signed-off-by: Keith Busch [hch: refactored, restored poll early exit optimization] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 87 +++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3dfedc84a921..7fbb6f94b561 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -161,7 +161,6 @@ struct nvme_queue { u16 cq_head; u16 qid; u8 cq_phase; - u8 cqe_seen; u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -932,9 +931,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) } } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { + volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { @@ -957,50 +956,58 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } - nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } -static inline bool nvme_read_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) { - if (nvme_cqe_pending(nvmeq)) { - *cqe = nvmeq->cqes[nvmeq->cq_head]; + while (start != end) { + nvme_handle_cqe(nvmeq, start); + if (++start == nvmeq->q_depth) + start = 0; + } +} - if (++nvmeq->cq_head == nvmeq->q_depth) { - nvmeq->cq_head = 0; - nvmeq->cq_phase = !nvmeq->cq_phase; - } - return true; +static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) +{ + if (++nvmeq->cq_head == nvmeq->q_depth) { + nvmeq->cq_head = 0; + nvmeq->cq_phase = !nvmeq->cq_phase; } - return false; } -static void nvme_process_cq(struct nvme_queue *nvmeq) +static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, int tag) { - struct nvme_completion cqe; - int consumed = 0; + bool found = false; - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; + *start = nvmeq->cq_head; + while (!found && nvme_cqe_pending(nvmeq)) { + if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found = true; + nvme_update_cq_head(nvmeq); } + *end = nvmeq->cq_head; - if (consumed) + if (*start != *end) nvme_ring_cq_doorbell(nvmeq); + return found; } static irqreturn_t nvme_irq(int irq, void *data) { - irqreturn_t result; struct nvme_queue *nvmeq = data; + u16 start, end; + spin_lock(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; - nvmeq->cqe_seen = 0; + nvme_process_cq(nvmeq, &start, &end, -1); spin_unlock(&nvmeq->q_lock); - return result; + + if (start == end) + return IRQ_NONE; + nvme_complete_cqes(nvmeq, start, end); + return IRQ_HANDLED; } static irqreturn_t nvme_irq_check(int irq, void *data) @@ -1013,27 +1020,17 @@ static irqreturn_t nvme_irq_check(int irq, void *data) static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_completion cqe; - int found = 0, consumed = 0; + u16 start, end; + bool found; if (!nvme_cqe_pending(nvmeq)) return 0; spin_lock_irq(&nvmeq->q_lock); - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; - - if (tag == cqe.command_id) { - found = 1; - break; - } - } - - if (consumed) - nvme_ring_cq_doorbell(nvmeq); + found = nvme_process_cq(nvmeq, &start, &end, tag); spin_unlock_irq(&nvmeq->q_lock); + nvme_complete_cqes(nvmeq, start, end); return found; } @@ -1340,6 +1337,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; + u16 start, end; if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); @@ -1347,8 +1345,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); + nvme_process_cq(nvmeq, &start, &end, -1); spin_unlock_irq(&nvmeq->q_lock); + + nvme_complete_cqes(nvmeq, start, end); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -1995,6 +1995,7 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error) static void nvme_del_cq_end(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; + u16 start, end; if (!error) { unsigned long flags; @@ -2006,8 +2007,10 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error) */ spin_lock_irqsave_nested(&nvmeq->q_lock, flags, SINGLE_DEPTH_NESTING); - nvme_process_cq(nvmeq); + nvme_process_cq(nvmeq, &start, &end, -1); spin_unlock_irqrestore(&nvmeq->q_lock, flags); + + nvme_complete_cqes(nvmeq, start, end); } nvme_del_queue_end(req, error); -- cgit v1.2.3-59-g8ed1b From 1ab0cd6966fc4a7e9dfbd7c6eda917ae9c977f42 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 May 2018 18:31:51 +0200 Subject: nvme-pci: split the nvme queue lock into submission and completion locks This is now feasible. We protect the submission queue ring with ->sq_lock, and the completion side with ->cq_lock. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7fbb6f94b561..1b49b694a57a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) struct nvme_queue { struct device *q_dmadev; struct nvme_dev *dev; - spinlock_t q_lock; + spinlock_t sq_lock; struct nvme_command *sq_cmds; struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -894,9 +895,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->sq_lock); __nvme_submit_cmd(nvmeq, &cmnd); - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->sq_lock); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -1000,9 +1001,9 @@ static irqreturn_t nvme_irq(int irq, void *data) struct nvme_queue *nvmeq = data; u16 start, end; - spin_lock(&nvmeq->q_lock); + spin_lock(&nvmeq->cq_lock); nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock(&nvmeq->q_lock); + spin_unlock(&nvmeq->cq_lock); if (start == end) return IRQ_NONE; @@ -1026,9 +1027,9 @@ static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) if (!nvme_cqe_pending(nvmeq)) return 0; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); found = nvme_process_cq(nvmeq, &start, &end, tag); - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); nvme_complete_cqes(nvmeq, start, end); return found; @@ -1051,9 +1052,9 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->sq_lock); __nvme_submit_cmd(nvmeq, &c); - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->sq_lock); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1310,15 +1311,15 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { int vector; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); if (nvmeq->cq_vector == -1) { - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); return 1; } vector = nvmeq->cq_vector; nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); /* * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without @@ -1344,9 +1345,9 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) else nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); nvme_complete_cqes(nvmeq, start, end); } @@ -1406,7 +1407,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; - spin_lock_init(&nvmeq->q_lock); + spin_lock_init(&nvmeq->sq_lock); + spin_lock_init(&nvmeq->cq_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1442,7 +1444,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { struct nvme_dev *dev = nvmeq->dev; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; @@ -1450,7 +1452,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); } static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) @@ -2001,14 +2003,14 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error) unsigned long flags; /* - * We might be called with the AQ q_lock held - * and the I/O queue q_lock should always + * We might be called with the AQ cq_lock held + * and the I/O queue cq_lock should always * nest inside the AQ one. */ - spin_lock_irqsave_nested(&nvmeq->q_lock, flags, + spin_lock_irqsave_nested(&nvmeq->cq_lock, flags, SINGLE_DEPTH_NESTING); nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irqrestore(&nvmeq->q_lock, flags); + spin_unlock_irqrestore(&nvmeq->cq_lock, flags); nvme_complete_cqes(nvmeq, start, end); } -- cgit v1.2.3-59-g8ed1b From 1eae349d18fc7b8e7d88673f7194d7a542fdb25c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 May 2018 18:31:52 +0200 Subject: nvme-pci: drop IRQ disabling on submission queue lock Since we aren't sharing the lock for completions now, we don't have to make it IRQ safe. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b49b694a57a..133fc063d10c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -895,9 +895,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); - spin_lock_irq(&nvmeq->sq_lock); + spin_lock(&nvmeq->sq_lock); __nvme_submit_cmd(nvmeq, &cmnd); - spin_unlock_irq(&nvmeq->sq_lock); + spin_unlock(&nvmeq->sq_lock); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -1052,9 +1052,9 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - spin_lock_irq(&nvmeq->sq_lock); + spin_lock(&nvmeq->sq_lock); __nvme_submit_cmd(nvmeq, &c); - spin_unlock_irq(&nvmeq->sq_lock); + spin_unlock(&nvmeq->sq_lock); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) -- cgit v1.2.3-59-g8ed1b From 68fa9dbe0823e4f833ad8ddbb59c9ca279e84d57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 21 May 2018 08:41:52 -0600 Subject: nvme-pci: fix race between poll and IRQ completions If polling completions are racing with the IRQ triggered by a completion, the IRQ handler will find no work and return IRQ_NONE. This can trigger complaints about spurious interrupts: [ 560.169153] irq 630: nobody cared (try booting with the "irqpoll" option) [ 560.175988] CPU: 40 PID: 0 Comm: swapper/40 Not tainted 4.17.0-rc2+ #65 [ 560.175990] Hardware name: Intel Corporation S2600STB/S2600STB, BIOS SE5C620.86B.00.01.0010.010920180151 01/09/2018 [ 560.175991] Call Trace: [ 560.175994] [ 560.176005] dump_stack+0x5c/0x7b [ 560.176010] __report_bad_irq+0x30/0xc0 [ 560.176013] note_interrupt+0x235/0x280 [ 560.176020] handle_irq_event_percpu+0x51/0x70 [ 560.176023] handle_irq_event+0x27/0x50 [ 560.176026] handle_edge_irq+0x6d/0x180 [ 560.176031] handle_irq+0xa5/0x110 [ 560.176036] do_IRQ+0x41/0xc0 [ 560.176042] common_interrupt+0xf/0xf [ 560.176043] [ 560.176050] RIP: 0010:cpuidle_enter_state+0x9b/0x2b0 [ 560.176052] RSP: 0018:ffffa0ed4659fe98 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd [ 560.176055] RAX: ffff9527beb20a80 RBX: 000000826caee491 RCX: 000000000000001f [ 560.176056] RDX: 000000826caee491 RSI: 00000000335206ee RDI: 0000000000000000 [ 560.176057] RBP: 0000000000000001 R08: 00000000ffffffff R09: 0000000000000008 [ 560.176059] R10: ffffa0ed4659fe78 R11: 0000000000000001 R12: ffff9527beb29358 [ 560.176060] R13: ffffffffa235d4b8 R14: 0000000000000000 R15: 000000826caed593 [ 560.176065] ? cpuidle_enter_state+0x8b/0x2b0 [ 560.176071] do_idle+0x1f4/0x260 [ 560.176075] cpu_startup_entry+0x6f/0x80 [ 560.176080] start_secondary+0x184/0x1d0 [ 560.176085] secondary_startup_64+0xa5/0xb0 [ 560.176088] handlers: [ 560.178387] [<00000000efb612be>] nvme_irq [nvme] [ 560.183019] Disabling IRQ #630 A previous commit removed ->cqe_seen that was handling this case, but we need to handle this a bit differently due to completions now running outside the queue lock. Return IRQ_HANDLED from the IRQ handler, if the completion ring head was moved since we last saw it. Fixes: 5cb525c8315f ("nvme-pci: handle completions outside of the queue lock") Reported-by: Keith Busch Reviewed-by: Keith Busch Tested-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 133fc063d10c..917e1714f7d9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -160,6 +160,7 @@ struct nvme_queue { s16 cq_vector; u16 sq_tail; u16 cq_head; + u16 last_cq_head; u16 qid; u8 cq_phase; u32 *dbbuf_sq_db; @@ -999,16 +1000,22 @@ static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; + irqreturn_t ret = IRQ_NONE; u16 start, end; spin_lock(&nvmeq->cq_lock); + if (nvmeq->cq_head != nvmeq->last_cq_head) + ret = IRQ_HANDLED; nvme_process_cq(nvmeq, &start, &end, -1); + nvmeq->last_cq_head = nvmeq->cq_head; spin_unlock(&nvmeq->cq_lock); - if (start == end) - return IRQ_NONE; - nvme_complete_cqes(nvmeq, start, end); - return IRQ_HANDLED; + if (start != end) { + nvme_complete_cqes(nvmeq, start, end); + return IRQ_HANDLED; + } + + return ret; } static irqreturn_t nvme_irq_check(int irq, void *data) -- cgit v1.2.3-59-g8ed1b From 978628ec795b18179e74ef12cfcd35bae569d13e Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 17 May 2018 13:52:50 +0200 Subject: nvme: fix lockdep warning in nvme_mpath_clear_current_path When running blktest's nvme/005 with a lockdep enabled kernel the test case fails due to the following lockdep splat in dmesg: ============================= WARNING: suspicious RCU usage 4.17.0-rc5 #881 Not tainted ----------------------------- drivers/nvme/host/nvme.h:457 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by kworker/u32:5/1102: #0: (ptrval) ((wq_completion)"nvme-wq"){+.+.}, at: process_one_work+0x152/0x5c0 #1: (ptrval) ((work_completion)(&ctrl->scan_work)){+.+.}, at: process_one_work+0x152/0x5c0 #2: (ptrval) (&subsys->lock#2){+.+.}, at: nvme_ns_remove+0x43/0x1c0 [nvme_core] The only caller of nvme_mpath_clear_current_path() is nvme_ns_remove() which holds the subsys lock so it's likely a false positive, but when using rcu_access_pointer(), we're telling rcu and lockdep that we're only after the pointer falue. Fixes: 32acab3181c7 ("nvme: implement multipath access to nvme subsystems") Signed-off-by: Johannes Thumshirn Suggested-by: Paul E. McKenney Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 35d9915829e2..ec6e4acc4d48 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -22,6 +22,7 @@ #include #include #include +#include extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -449,7 +450,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - if (head && ns == srcu_dereference(head->current_path, &head->srcu)) + if (head && ns == rcu_access_pointer(head->current_path)) rcu_assign_pointer(head->current_path, NULL); } struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); -- cgit v1.2.3-59-g8ed1b From e9a9853c23c13a37546397b61b270999fd0fb759 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Wed, 23 May 2018 17:56:11 +0300 Subject: nvme: host: core: fix precedence of ternary operator Ternary operator have lower precedence then bitwise or, so 'cdw10' was calculated wrong. Signed-off-by: Ivan Bornyakov Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e1d6d69b307..dc8aa2c1c22a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1578,7 +1578,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1590,7 +1590,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } -- cgit v1.2.3-59-g8ed1b From a8e3e0bb749c0893a2cfd24652a77f45eea338d3 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 24 May 2018 17:51:33 +0800 Subject: nvme-pci: set nvmeq->cq_vector after alloc cq/sq Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will invoke free_irq for it and cause a 'Trying to free already-free IRQ xxx' warning if the create CQ/SQ command times out. Signed-off-by: Jianchao Wang Reviewed-by: Keith Busch [hch: fixed to pass a s16 and clean up the comment] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 917e1714f7d9..41880cb9e502 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1076,7 +1076,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, - struct nvme_queue *nvmeq) + struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; @@ -1091,7 +1091,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1466,6 +1466,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) { struct nvme_dev *dev = nvmeq->dev; int result; + s16 vector; if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), @@ -1478,15 +1479,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; - result = adapter_alloc_cq(dev, qid, nvmeq); + vector = dev->num_vecs == 1 ? 0 : qid; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result < 0) - goto release_vector; + goto out; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) goto release_cq; + /* + * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will + * invoke free_irq for it and cause a 'Trying to free already-free IRQ + * xxx' warning if the create CQ/SQ command times out. + */ + nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); result = queue_request_irq(nvmeq); if (result < 0) @@ -1494,13 +1501,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; - release_sq: +release_sq: + nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); - release_cq: +release_cq: adapter_delete_cq(dev, qid); - release_vector: - nvmeq->cq_vector = -1; +out: return result; } -- cgit v1.2.3-59-g8ed1b From 72cd4cc28e234ed7189ee508ed65ab60c80a97c8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 May 2018 16:16:04 -0600 Subject: nvme-pci: Fix AER reset handling The nvme timeout handling doesn't do anything if the pci channel is offline, which is the case when recovering from PCI error event, so it was a bad idea to sync the controller reset in this state. This patch flushes the reset work in the error_resume callback instead when the channel is back to online. This keeps AER handling serialized and can recover from timeouts. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199757 Fixes: cc1d5e749a2e ("nvme/pci: Sync controller reset for AER slot_reset") Reported-by: Alex Gagniuc Tested-by: Alex Gagniuc Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 41880cb9e502..79b48052ea11 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2706,19 +2706,15 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - nvme_reset_ctrl_sync(&dev->ctrl); - - switch (dev->ctrl.state) { - case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: - return PCI_ERS_RESULT_RECOVERED; - default: - return PCI_ERS_RESULT_DISCONNECT; - } + nvme_reset_ctrl(&dev->ctrl); + return PCI_ERS_RESULT_RECOVERED; } static void nvme_error_resume(struct pci_dev *pdev) { + struct nvme_dev *dev = pci_get_drvdata(pdev); + + flush_work(&dev->ctrl.reset_work); pci_cleanup_aer_uncorrect_error_status(pdev); } -- cgit v1.2.3-59-g8ed1b From 2e050f00a0f0e07467050cb4afae0234941e5bf3 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 24 May 2018 09:27:38 +0800 Subject: nvme-rdma: stop admin queue before freeing it For any failure after nvme_rdma_start_queue in nvme_rdma_configure_admin_queue, the admin queue will be freed with the NVME_RDMA_Q_LIVE flag still set. Once nvme_rdma_stop_queue is invoked, that will cause a use-after-free. BUG: KASAN: use-after-free in rdma_disconnect+0x1f/0xe0 [rdma_cm] To fix it, call nvme_rdma_stop_queue for all the failed cases after nvme_rdma_start_queue. Signed-off-by: Jianchao Wang Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1eb4438a8763..2181299ce8f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; + goto out_stop_queue; } ctrl->ctrl.sqsize = @@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) - goto out_cleanup_queue; + goto out_stop_queue; ctrl->ctrl.max_hw_sectors = (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_stop_queue; error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (error) - goto out_cleanup_queue; + goto out_stop_queue; return 0; +out_stop_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); -- cgit v1.2.3-59-g8ed1b From 90fcaf5d54c57037e9f879f17b58497db7156c3e Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 11 May 2018 17:50:24 -0700 Subject: nvme-fc: remove setting DNR on exception conditions Current code will set DNR if the controller is deleting or there is an error during controller init. None of this is necessary. Remove the code that sets DNR Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8b66879b4ebf..ac35a80f5532 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1686,16 +1686,6 @@ done: goto check_error; } - /* - * Force failures of commands if we're killing the controller - * or have an error on a command used to create an new association - */ - if (status && - (blk_queue_dying(rq->q) || - ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_CONNECTING)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); nvme_end_request(rq, status, result); -- cgit v1.2.3-59-g8ed1b From ffecb0b452d5cbdd2ac3614bdf6cd39ba47b0cb8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 11 May 2018 17:50:22 -0700 Subject: nvme-fabrics: remove unnecessary controller subnqn validation After creating the nvme controller, nvmf_create_ctrl() validates the newly created subsysnqn vs the one specified by the options. In general, this is an unnecessary check as the Connect message should implicitly ensure this value matches. With the change to the FC transport to do an asynchronous connect for the first association create, the transport will return to nvmf_create_ctrl() before that first association has been established, thus the subnqn will not yet be set. Remove the unnecessary validation. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae732a77fe8..6fdf499d16b4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -983,16 +983,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_module_put; } - if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { - dev_warn(ctrl->device, - "controller returned incorrect NQN: \"%s\".\n", - ctrl->subsys->subnqn); - module_put(ops->module); - up_read(&nvmf_transports_rwsem); - nvme_delete_ctrl_sync(ctrl); - return ERR_PTR(-EINVAL); - } - module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; -- cgit v1.2.3-59-g8ed1b From 461fbc8f0ed1ac18ce6b7bf7ab0b7444f7c7ff57 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 May 2018 16:18:15 +0200 Subject: nvme-fabrics: centralize discovery controller defaults When connecting to the discovery controller we have certain defaults to observe, so centralize them to avoid inconsistencies due to argument ordering. Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 6fdf499d16b4..5ad6104577f0 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -689,10 +689,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) { - opts->kato = 0; - opts->nr_io_queues = 0; - } break; case NVMF_OPT_TRADDR: p = match_strdup(args); @@ -851,6 +847,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (opts->discovery_nqn) { + opts->kato = 0; + opts->nr_io_queues = 0; + } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else -- cgit v1.2.3-59-g8ed1b From 181303d03525ea52d2d002fb8ee04e769aaa4ce4 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 May 2018 16:18:17 +0200 Subject: nvme-fabrics: allow duplicate connections to the discovery controller The whole point of the discovery controller is that it can accept multiple connections. Additionally the cmic field is not even defined for the discovery controller identify page. Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/fabrics.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc8aa2c1c22a..fd206a6adad5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2184,7 +2184,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { + if (!ctrl->opts->discovery_nqn && + nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5ad6104577f0..8cb3d73e957d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -850,6 +850,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (opts->discovery_nqn) { opts->kato = 0; opts->nr_io_queues = 0; + opts->duplicate_connect = true; } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; -- cgit v1.2.3-59-g8ed1b From b40b83e365d6ccc2d900e7f481bab91146a68407 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 May 2018 02:02:33 -0400 Subject: nvmet: make a few error messages more generic "nvmet_check_ctrl_status()" is called from admin-cmd.c along with io-cmd.c, make the error message more generic. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index e95424f172fd..4a9908bb7de0 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -710,13 +710,13 @@ out: u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) { if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); req->ns = NULL; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; -- cgit v1.2.3-59-g8ed1b From 618cff4285dc0ef6ebb99f715116e7af62565293 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 10 May 2018 02:46:30 -0400 Subject: nvmet: remove duplicate NULL initialization for req->ns Remove the duplicate NULL initialization for req->ns. req->ns is always initialized to NULL in nvmet_req_init(), so there is no need to reset it later on failures unless we have previously assigned a value to it. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 -- drivers/nvme/target/core.c | 1 - drivers/nvme/target/discovery.c | 2 -- drivers/nvme/target/fabrics-cmd.c | 4 ---- drivers/nvme/target/io-cmd.c | 4 +--- 5 files changed, 1 insertion(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 5e0e9fcc0d4d..d1afcfd89aa3 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -548,8 +548,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; u16 ret; - req->ns = NULL; - ret = nvmet_check_ctrl_status(req, cmd); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4a9908bb7de0..6d8eaf3f89c5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -718,7 +718,6 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); - req->ns = NULL; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } return 0; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 231e04e0a496..08656b849bd6 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while not ready\n", cmd->common.opcode); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 19e9e42ae943..d84ae004cb85 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - switch (cmd->fabrics.fctype) { case nvme_fabrics_type_property_set: req->data_len = 0; @@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index cd2344179673..e5eb2db4d20f 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -195,10 +195,8 @@ u16 nvmet_parse_io_cmd(struct nvmet_req *req) u16 ret; ret = nvmet_check_ctrl_status(req, cmd); - if (unlikely(ret)) { - req->ns = NULL; + if (unlikely(ret)) return ret; - } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); if (unlikely(!req->ns)) -- cgit v1.2.3-59-g8ed1b From d5eff33ee6f80864d889d64fd3f6ea7b78dd1b24 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 23 May 2018 00:34:39 -0400 Subject: nvmet: add simple file backed ns support This patch adds simple file backed namespace support for NVMeOF target. The new file io-cmd-file.c is responsible for handling the code for I/O commands when ns is file backed. Also, we introduce mempools based slow path using sync I/Os for file backed ns to ensure forward progress under reclaim. The old block device based implementation is moved to io-cmd-bdev.c and use a "nvmet_bdev_" symbol prefix. The enable/disable calls are also move into the respective files. Signed-off-by: Chaitanya Kulkarni [hch: updated changelog, fixed double req->ns lookup in bdev case] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Makefile | 4 +- drivers/nvme/target/admin-cmd.c | 8 + drivers/nvme/target/core.c | 50 ++++--- drivers/nvme/target/io-cmd-bdev.c | 241 ++++++++++++++++++++++++++++++ drivers/nvme/target/io-cmd-file.c | 300 ++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/io-cmd.c | 228 ----------------------------- drivers/nvme/target/nvmet.h | 28 +++- 7 files changed, 610 insertions(+), 249 deletions(-) create mode 100644 drivers/nvme/target/io-cmd-bdev.c create mode 100644 drivers/nvme/target/io-cmd-file.c delete mode 100644 drivers/nvme/target/io-cmd.c (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index 488250189c99..8118c93391c6 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o -nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \ - discovery.o +nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ + discovery.o io-cmd-file.o io-cmd-bdev.o nvme-loop-y += loop.o nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index d1afcfd89aa3..b2ba95b2eef7 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -45,6 +45,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, return NVME_SC_INVALID_NS; } + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + goto out; + host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); @@ -54,6 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, put_unaligned_le64(data_units_read, &slog->data_units_read[0]); put_unaligned_le64(host_writes, &slog->host_writes[0]); put_unaligned_le64(data_units_written, &slog->data_units_written[0]); +out: nvmet_put_namespace(ns); return NVME_SC_SUCCESS; @@ -71,6 +76,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, rcu_read_lock(); list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + continue; host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read += part_stat_read(ns->bdev->bd_part, sectors[READ]); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6d8eaf3f89c5..800aaf96ddcd 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -271,6 +271,12 @@ void nvmet_put_namespace(struct nvmet_ns *ns) percpu_ref_put(&ns->ref); } +static void nvmet_ns_dev_disable(struct nvmet_ns *ns) +{ + nvmet_bdev_ns_disable(ns); + nvmet_file_ns_disable(ns); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; @@ -281,23 +287,16 @@ int nvmet_ns_enable(struct nvmet_ns *ns) if (ns->enabled) goto out_unlock; - ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, - NULL); - if (IS_ERR(ns->bdev)) { - pr_err("failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); - ret = PTR_ERR(ns->bdev); - ns->bdev = NULL; + ret = nvmet_bdev_ns_enable(ns); + if (ret) + ret = nvmet_file_ns_enable(ns); + if (ret) goto out_unlock; - } - - ns->size = i_size_read(ns->bdev->bd_inode); - ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL); if (ret) - goto out_blkdev_put; + goto out_dev_put; if (ns->nsid > subsys->max_nsid) subsys->max_nsid = ns->nsid; @@ -328,9 +327,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) out_unlock: mutex_unlock(&subsys->lock); return ret; -out_blkdev_put: - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); - ns->bdev = NULL; +out_dev_put: + nvmet_ns_dev_disable(ns); goto out_unlock; } @@ -366,8 +364,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - if (ns->bdev) - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); + nvmet_ns_dev_disable(ns); out_unlock: mutex_unlock(&subsys->lock); } @@ -499,6 +496,25 @@ int nvmet_sq_init(struct nvmet_sq *sq) } EXPORT_SYMBOL_GPL(nvmet_sq_init); +static u16 nvmet_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + u16 ret; + + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; + + req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); + if (unlikely(!req->ns)) + return NVME_SC_INVALID_NS | NVME_SC_DNR; + + if (req->ns->file) + return nvmet_file_parse_io_cmd(req); + else + return nvmet_bdev_parse_io_cmd(req); +} + bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c new file mode 100644 index 000000000000..e0b0f7df70c2 --- /dev/null +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -0,0 +1,241 @@ +/* + * NVMe I/O command implementation. + * Copyright (c) 2015-2016 HGST, a Western Digital Company. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include "nvmet.h" + +int nvmet_bdev_ns_enable(struct nvmet_ns *ns) +{ + int ret; + + ns->bdev = blkdev_get_by_path(ns->device_path, + FMODE_READ | FMODE_WRITE, NULL); + if (IS_ERR(ns->bdev)) { + ret = PTR_ERR(ns->bdev); + if (ret != -ENOTBLK) { + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); + } + ns->bdev = NULL; + return ret; + } + ns->size = i_size_read(ns->bdev->bd_inode); + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + return 0; +} + +void nvmet_bdev_ns_disable(struct nvmet_ns *ns) +{ + if (ns->bdev) { + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + ns->bdev = NULL; + } +} + +static void nvmet_bio_done(struct bio *bio) +{ + struct nvmet_req *req = bio->bi_private; + + nvmet_req_complete(req, + bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + + if (bio != &req->b.inline_bio) + bio_put(bio); +} + +static void nvmet_bdev_execute_rw(struct nvmet_req *req) +{ + int sg_cnt = req->sg_cnt; + struct bio *bio = &req->b.inline_bio; + struct scatterlist *sg; + sector_t sector; + blk_qc_t cookie; + int op, op_flags = 0, i; + + if (!req->sg_cnt) { + nvmet_req_complete(req, 0); + return; + } + + if (req->cmd->rw.opcode == nvme_cmd_write) { + op = REQ_OP_WRITE; + op_flags = REQ_SYNC | REQ_IDLE; + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + op_flags |= REQ_FUA; + } else { + op = REQ_OP_READ; + } + + sector = le64_to_cpu(req->cmd->rw.slba); + sector <<= (req->ns->blksize_shift - 9); + + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_iter.bi_sector = sector; + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + bio_set_op_attrs(bio, op, op_flags); + + for_each_sg(req->sg, sg, req->sg_cnt, i) { + while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) + != sg->length) { + struct bio *prev = bio; + + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_iter.bi_sector = sector; + bio_set_op_attrs(bio, op, op_flags); + + bio_chain(bio, prev); + submit_bio(prev); + } + + sector += sg->length >> 9; + sg_cnt--; + } + + cookie = submit_bio(bio); + + blk_poll(bdev_get_queue(req->ns->bdev), cookie); +} + +static void nvmet_bdev_execute_flush(struct nvmet_req *req) +{ + struct bio *bio = &req->b.inline_bio; + + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + submit_bio(bio); +} + +static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns, + struct nvme_dsm_range *range, struct bio **bio) +{ + int ret; + + ret = __blkdev_issue_discard(ns->bdev, + le64_to_cpu(range->slba) << (ns->blksize_shift - 9), + le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), + GFP_KERNEL, 0, bio); + if (ret && ret != -EOPNOTSUPP) + return NVME_SC_INTERNAL | NVME_SC_DNR; + return 0; +} + +static void nvmet_bdev_execute_discard(struct nvmet_req *req) +{ + struct nvme_dsm_range range; + struct bio *bio = NULL; + int i; + u16 status; + + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { + status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range)); + if (status) + break; + + status = nvmet_bdev_discard_range(req->ns, &range, &bio); + if (status) + break; + } + + if (bio) { + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + if (status) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + } else { + submit_bio(bio); + } + } else { + nvmet_req_complete(req, status); + } +} + +static void nvmet_bdev_execute_dsm(struct nvmet_req *req) +{ + switch (le32_to_cpu(req->cmd->dsm.attributes)) { + case NVME_DSMGMT_AD: + nvmet_bdev_execute_discard(req); + return; + case NVME_DSMGMT_IDR: + case NVME_DSMGMT_IDW: + default: + /* Not supported yet */ + nvmet_req_complete(req, 0); + return; + } +} + +static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) +{ + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + struct bio *bio = NULL; + u16 status = NVME_SC_SUCCESS; + sector_t sector; + sector_t nr_sector; + + sector = le64_to_cpu(write_zeroes->slba) << + (req->ns->blksize_shift - 9); + nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + (req->ns->blksize_shift - 9)); + + if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, + GFP_KERNEL, &bio, 0)) + status = NVME_SC_INTERNAL | NVME_SC_DNR; + + if (bio) { + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + submit_bio(bio); + } else { + nvmet_req_complete(req, status); + } +} + +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->execute = nvmet_bdev_execute_rw; + req->data_len = nvmet_rw_len(req); + return 0; + case nvme_cmd_flush: + req->execute = nvmet_bdev_execute_flush; + req->data_len = 0; + return 0; + case nvme_cmd_dsm: + req->execute = nvmet_bdev_execute_dsm; + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); + return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_bdev_execute_write_zeroes; + return 0; + default: + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } +} diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c new file mode 100644 index 000000000000..ca1ccf867856 --- /dev/null +++ b/drivers/nvme/target/io-cmd-file.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe Over Fabrics Target File I/O commands implementation. + * Copyright (c) 2017-2018 Western Digital Corporation or its + * affiliates. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include "nvmet.h" + +#define NVMET_MAX_MPOOL_BVEC 16 +#define NVMET_MIN_MPOOL_OBJ 16 + +void nvmet_file_ns_disable(struct nvmet_ns *ns) +{ + if (ns->file) { + mempool_destroy(ns->bvec_pool); + ns->bvec_pool = NULL; + kmem_cache_destroy(ns->bvec_cache); + ns->bvec_cache = NULL; + fput(ns->file); + ns->file = NULL; + } +} + +int nvmet_file_ns_enable(struct nvmet_ns *ns) +{ + int ret; + struct kstat stat; + + ns->file = filp_open(ns->device_path, + O_RDWR | O_LARGEFILE | O_DIRECT, 0); + if (IS_ERR(ns->file)) { + pr_err("failed to open file %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); + return PTR_ERR(ns->file); + } + + ret = vfs_getattr(&ns->file->f_path, + &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + if (ret) + goto err; + + ns->size = stat.size; + ns->blksize_shift = file_inode(ns->file)->i_blkbits; + + ns->bvec_cache = kmem_cache_create("nvmet-bvec", + NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ns->bvec_cache) + goto err; + + ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, + mempool_free_slab, ns->bvec_cache); + + if (!ns->bvec_pool) + goto err; + + return ret; +err: + ns->size = 0; + ns->blksize_shift = 0; + nvmet_file_ns_disable(ns); + return ret; +} + +static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) +{ + bv->bv_page = sg_page_iter_page(iter); + bv->bv_offset = iter->sg->offset; + bv->bv_len = PAGE_SIZE - iter->sg->offset; +} + +static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, + unsigned long nr_segs, size_t count) +{ + struct kiocb *iocb = &req->f.iocb; + ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); + struct iov_iter iter; + int ki_flags = 0, rw; + ssize_t ret; + + if (req->cmd->rw.opcode == nvme_cmd_write) { + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + ki_flags = IOCB_DSYNC; + call_iter = req->ns->file->f_op->write_iter; + rw = WRITE; + } else { + call_iter = req->ns->file->f_op->read_iter; + rw = READ; + } + + iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count); + + iocb->ki_pos = pos; + iocb->ki_filp = req->ns->file; + iocb->ki_flags = IOCB_DIRECT | ki_flags; + + ret = call_iter(iocb, &iter); + + if (ret != -EIOCBQUEUED && iocb->ki_complete) + iocb->ki_complete(iocb, ret, 0); + + return ret; +} + +static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) +{ + struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); + + if (req->f.bvec != req->inline_bvec) { + if (likely(req->f.mpool_alloc == false)) + kfree(req->f.bvec); + else + mempool_free(req->f.bvec, req->ns->bvec_pool); + } + + nvmet_req_complete(req, ret != req->data_len ? + NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + struct sg_page_iter sg_pg_iter; + unsigned long bv_cnt = 0; + bool is_sync = false; + size_t len = 0, total_len = 0; + ssize_t ret = 0; + loff_t pos; + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + req->f.mpool_alloc = false; + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + if (nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; + } + + pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; + + memset(&req->f.iocb, 0, sizeof(struct kiocb)); + for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { + nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); + len += req->f.bvec[bv_cnt].bv_len; + total_len += req->f.bvec[bv_cnt].bv_len; + bv_cnt++; + + WARN_ON_ONCE((nr_bvec - 1) < 0); + + if (unlikely(is_sync) && + (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + if (ret < 0) + goto out; + pos += len; + bv_cnt = 0; + len = 0; + } + nr_bvec--; + } + + if (WARN_ON_ONCE(total_len != req->data_len)) + ret = -EIO; +out: + if (unlikely(is_sync || ret)) { + nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); + return; + } + req->f.iocb.ki_complete = nvmet_file_io_done; + nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); +} + +static void nvmet_file_flush_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + int ret; + + ret = vfs_fsync(req->ns->file, 1); + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_flush(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_flush_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_execute_discard(struct nvmet_req *req) +{ + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + struct nvme_dsm_range range; + loff_t offset; + loff_t len; + int i, ret; + + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { + if (nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range))) + break; + offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + ret = vfs_fallocate(req->ns->file, mode, offset, len); + if (ret) + break; + } + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_dsm_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + + switch (le32_to_cpu(req->cmd->dsm.attributes)) { + case NVME_DSMGMT_AD: + nvmet_file_execute_discard(req); + return; + case NVME_DSMGMT_IDR: + case NVME_DSMGMT_IDW: + default: + /* Not supported yet */ + nvmet_req_complete(req, 0); + return; + } +} + +static void nvmet_file_execute_dsm(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_dsm_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_write_zeroes_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE; + loff_t offset; + loff_t len; + int ret; + + offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift; + len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + req->ns->blksize_shift); + + ret = vfs_fallocate(req->ns->file, mode, offset, len); + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); + schedule_work(&req->f.work); +} + +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->execute = nvmet_file_execute_rw; + req->data_len = nvmet_rw_len(req); + return 0; + case nvme_cmd_flush: + req->execute = nvmet_file_execute_flush; + req->data_len = 0; + return 0; + case nvme_cmd_dsm: + req->execute = nvmet_file_execute_dsm; + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); + return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_file_execute_write_zeroes; + req->data_len = 0; + return 0; + default: + pr_err("unhandled cmd for file ns %d on qid %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } +} diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c deleted file mode 100644 index e5eb2db4d20f..000000000000 --- a/drivers/nvme/target/io-cmd.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * NVMe I/O command implementation. - * Copyright (c) 2015-2016 HGST, a Western Digital Company. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include "nvmet.h" - -static void nvmet_bio_done(struct bio *bio) -{ - struct nvmet_req *req = bio->bi_private; - - nvmet_req_complete(req, - bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); - - if (bio != &req->inline_bio) - bio_put(bio); -} - -static inline u32 nvmet_rw_len(struct nvmet_req *req) -{ - return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << - req->ns->blksize_shift; -} - -static void nvmet_execute_rw(struct nvmet_req *req) -{ - int sg_cnt = req->sg_cnt; - struct bio *bio = &req->inline_bio; - struct scatterlist *sg; - sector_t sector; - blk_qc_t cookie; - int op, op_flags = 0, i; - - if (!req->sg_cnt) { - nvmet_req_complete(req, 0); - return; - } - - if (req->cmd->rw.opcode == nvme_cmd_write) { - op = REQ_OP_WRITE; - op_flags = REQ_SYNC | REQ_IDLE; - if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - op_flags |= REQ_FUA; - } else { - op = REQ_OP_READ; - } - - sector = le64_to_cpu(req->cmd->rw.slba); - sector <<= (req->ns->blksize_shift - 9); - - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_iter.bi_sector = sector; - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, op, op_flags); - - for_each_sg(req->sg, sg, req->sg_cnt, i) { - while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) - != sg->length) { - struct bio *prev = bio; - - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_iter.bi_sector = sector; - bio_set_op_attrs(bio, op, op_flags); - - bio_chain(bio, prev); - submit_bio(prev); - } - - sector += sg->length >> 9; - sg_cnt--; - } - - cookie = submit_bio(bio); - - blk_poll(bdev_get_queue(req->ns->bdev), cookie); -} - -static void nvmet_execute_flush(struct nvmet_req *req) -{ - struct bio *bio = &req->inline_bio; - - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - submit_bio(bio); -} - -static u16 nvmet_discard_range(struct nvmet_ns *ns, - struct nvme_dsm_range *range, struct bio **bio) -{ - int ret; - - ret = __blkdev_issue_discard(ns->bdev, - le64_to_cpu(range->slba) << (ns->blksize_shift - 9), - le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); - if (ret && ret != -EOPNOTSUPP) - return NVME_SC_INTERNAL | NVME_SC_DNR; - return 0; -} - -static void nvmet_execute_discard(struct nvmet_req *req) -{ - struct nvme_dsm_range range; - struct bio *bio = NULL; - int i; - u16 status; - - for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { - status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, - sizeof(range)); - if (status) - break; - - status = nvmet_discard_range(req->ns, &range, &bio); - if (status) - break; - } - - if (bio) { - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - if (status) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - } else { - submit_bio(bio); - } - } else { - nvmet_req_complete(req, status); - } -} - -static void nvmet_execute_dsm(struct nvmet_req *req) -{ - switch (le32_to_cpu(req->cmd->dsm.attributes)) { - case NVME_DSMGMT_AD: - nvmet_execute_discard(req); - return; - case NVME_DSMGMT_IDR: - case NVME_DSMGMT_IDW: - default: - /* Not supported yet */ - nvmet_req_complete(req, 0); - return; - } -} - -static void nvmet_execute_write_zeroes(struct nvmet_req *req) -{ - struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; - struct bio *bio = NULL; - u16 status = NVME_SC_SUCCESS; - sector_t sector; - sector_t nr_sector; - - sector = le64_to_cpu(write_zeroes->slba) << - (req->ns->blksize_shift - 9); - nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << - (req->ns->blksize_shift - 9)); - - if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, 0)) - status = NVME_SC_INTERNAL | NVME_SC_DNR; - - if (bio) { - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - submit_bio(bio); - } else { - nvmet_req_complete(req, status); - } -} - -u16 nvmet_parse_io_cmd(struct nvmet_req *req) -{ - struct nvme_command *cmd = req->cmd; - u16 ret; - - ret = nvmet_check_ctrl_status(req, cmd); - if (unlikely(ret)) - return ret; - - req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (unlikely(!req->ns)) - return NVME_SC_INVALID_NS | NVME_SC_DNR; - - switch (cmd->common.opcode) { - case nvme_cmd_read: - case nvme_cmd_write: - req->execute = nvmet_execute_rw; - req->data_len = nvmet_rw_len(req); - return 0; - case nvme_cmd_flush: - req->execute = nvmet_execute_flush; - req->data_len = 0; - return 0; - case nvme_cmd_dsm: - req->execute = nvmet_execute_dsm; - req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * - sizeof(struct nvme_dsm_range); - return 0; - case nvme_cmd_write_zeroes: - req->execute = nvmet_execute_write_zeroes; - return 0; - default: - pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, - req->sq->qid); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; - } -} diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 15fd84ab21f8..2d09afcfe505 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -43,6 +43,7 @@ struct nvmet_ns { struct list_head dev_link; struct percpu_ref ref; struct block_device *bdev; + struct file *file; u32 nsid; u32 blksize_shift; loff_t size; @@ -57,6 +58,8 @@ struct nvmet_ns { struct config_group group; struct completion disable_done; + mempool_t *bvec_pool; + struct kmem_cache *bvec_cache; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) @@ -222,8 +225,18 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; - struct bio inline_bio; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; + union { + struct { + struct bio inline_bio; + } b; + struct { + bool mpool_alloc; + struct kiocb iocb; + struct bio_vec *bvec; + struct work_struct work; + } f; + }; int sg_cnt; /* data length as parsed from the command: */ size_t data_len; @@ -263,7 +276,8 @@ struct nvmet_async_event { }; u16 nvmet_parse_connect_cmd(struct nvmet_req *req); -u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); u16 nvmet_parse_admin_cmd(struct nvmet_req *req); u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); @@ -338,4 +352,14 @@ extern struct rw_semaphore nvmet_config_sem; bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, const char *hostnqn); +int nvmet_bdev_ns_enable(struct nvmet_ns *ns); +int nvmet_file_ns_enable(struct nvmet_ns *ns); +void nvmet_bdev_ns_disable(struct nvmet_ns *ns); +void nvmet_file_ns_disable(struct nvmet_ns *ns); + +static inline u32 nvmet_rw_len(struct nvmet_req *req) +{ + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << + req->ns->blksize_shift; +} #endif /* _NVMET_H */ -- cgit v1.2.3-59-g8ed1b From 17d78252ee568ecf94df6829c81578c0602991eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 21 May 2018 16:27:42 -0700 Subject: nvmet-fc: increase LS buffer count per fc port Todays limit on concurrent LS's is very small - 4 buffers. With large subsystem counts or large numbers of initiators connecting, the limit may be exceeded. Raise the LS buffer count to 256. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 33ee8d3145f8..408279cb6f2c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -31,7 +31,7 @@ /* *************************** Data Structures/Defines ****************** */ -#define NVMET_LS_CTX_COUNT 4 +#define NVMET_LS_CTX_COUNT 256 /* for this implementation, assume small single frame rqst/rsp */ #define NVME_FC_MAX_LS_BUFFER_SIZE 2048 -- cgit v1.2.3-59-g8ed1b From eb464833a2e787996474ad33dafa2c5336d4c477 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Fri, 11 May 2018 02:38:15 -0400 Subject: nvmet-loop: use nr_phys_segments when map rq to sgl Use blk_rq_nr_phys_segments() instead of blk_rq_payload_bytes() to check if a command contains data to me mapped. This fixes the case where a struct requests contains LBAs, but no data will actually be send, e.g. the pending Write Zeroes support. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 27a8561c0cb9..4284cefe6d32 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -174,7 +174,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - if (blk_rq_payload_bytes(req)) { + if (blk_rq_nr_phys_segments(req)) { iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), -- cgit v1.2.3-59-g8ed1b From 1e5f44616287b24a2c032274d6a5e3690dfd279b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 May 2018 11:04:03 +0200 Subject: nvme: fix KASAN warning when parsing host nqn The host nqn actually is smaller than the space reserved for it, so we should be using strlcpy to keep KASAN happy. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8cb3d73e957d..aa318136460e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: -- cgit v1.2.3-59-g8ed1b From 75c8b19a23100455339af947e032f1a221344c68 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 May 2018 11:06:27 +0200 Subject: nvme: fixup memory leak in nvme_init_identify() If nvme_get_effects_log() failed the 'id' buffer from the previous nvme_identify_ctrl() call will never be freed. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd206a6adad5..2c4cf65641a6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2316,7 +2316,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { ret = nvme_get_effects_log(ctrl); if (ret < 0) - return ret; + goto out_free; } if (!ctrl->identified) { -- cgit v1.2.3-59-g8ed1b From db8c48e4b2fa7c7ad1e429fe980e787698033ee9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 May 2018 15:52:30 +0200 Subject: nvme: return BLK_EH_DONE from ->timeout NVMe always completes the request before returning from ->timeout, either by polling for it, or by disabling the controller. Return BLK_EH_DONE so that the block layer doesn't even try to complete it again. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 14 +++++--------- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 917e1714f7d9..31525324b79f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1205,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_warn_reset(dev, csts); nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* @@ -1215,14 +1215,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on - * shutdown, so we return BLK_EH_HANDLED. + * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { case NVME_CTRL_CONNECTING: @@ -1232,7 +1232,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) req->tag, nvmeq->qid); nvme_dev_disable(dev, false); nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; default: break; } @@ -1249,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - /* - * Mark the request as handled, since the inline shutdown - * forces all outstanding requests to complete. - */ nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1eb4438a8763..ac7462cd7f0f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1598,7 +1598,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) /* fail with DNR on cmd timeout */ nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 27a8561c0cb9..22e3627bf16b 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -146,7 +146,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) /* fail with DNR on admin cmd timeout */ nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, -- cgit v1.2.3-59-g8ed1b From ab4f47a9f4a12603a1806230d44ead2e54158f85 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 25 May 2018 14:02:23 -0700 Subject: nvme: allow duplicate controller if prior controller being deleted The current checks for whether a new controller request "matches" an existing controller ignores controller state and checks identity strings. There are cases where an existing controller may be in its last steps of deletion when they are "matched" by a new connection. Change the behavior so that the new connection ignores controllers that are deleted. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index ef46c915b7b5..0cf0460a5c92 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -139,7 +139,9 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || + if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DEAD || + strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) return false; -- cgit v1.2.3-59-g8ed1b From b9cac43c2c4aab221ee0bac72bd6b9f8bf00a223 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 May 2018 14:34:55 -0600 Subject: nvme-pci: Rate limit the nvme timeout warnings The block layer's timeout handling currently prevents drivers from completing commands outside the timeout callback once blk-mq decides they've expired. If a device breaks, this could potentially create many thousands of timed out commands. There's nothing of value to be gleaned from observing each of those messages, so this patch adds a rate limit on them. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bca8f521f887..9cd62a13ff1f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1227,7 +1227,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) switch (dev->ctrl.state) { case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: - dev_warn(dev->ctrl.device, + dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); -- cgit v1.2.3-59-g8ed1b From 90ea5ca45c1fa09bec83ea106b9947170a00edb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 May 2018 13:45:55 +0200 Subject: nvme-pci: simplify __nvme_submit_cmd With recent CQ handling improvements we can now move the locking into __nvme_submit_cmd. Also remove the local tail variable to make the code more obvious, remove the __ prefix in the name, and fix the comments describing the function. Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Max Gurtovoy --- drivers/nvme/host/pci.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9cd62a13ff1f..4d0bb0e45401 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -421,28 +421,25 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) } /** - * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell + * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send - * - * Safe to use from interrupt context */ -static void __nvme_submit_cmd(struct nvme_queue *nvmeq, - struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) { - u16 tail = nvmeq->sq_tail; - + spin_lock(&nvmeq->sq_lock); if (nvmeq->sq_cmds_io) - memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd)); + memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd, + sizeof(*cmd)); else - memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++tail == nvmeq->q_depth) - tail = 0; - if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, - nvmeq->dbbuf_sq_ei)) - writel(tail, nvmeq->q_db); - nvmeq->sq_tail = tail; + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + spin_unlock(&nvmeq->sq_lock); } static void **nvme_pci_iod_list(struct request *req) @@ -895,10 +892,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - - spin_lock(&nvmeq->sq_lock); - __nvme_submit_cmd(nvmeq, &cmnd); - spin_unlock(&nvmeq->sq_lock); + nvme_submit_cmd(nvmeq, &cmnd); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -1058,10 +1052,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - - spin_lock(&nvmeq->sq_lock); - __nvme_submit_cmd(nvmeq, &c); - spin_unlock(&nvmeq->sq_lock); + nvme_submit_cmd(nvmeq, &c); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) -- cgit v1.2.3-59-g8ed1b From fe4a97918de02d5c656f29664770e335df12e090 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 May 2018 14:11:25 +0200 Subject: nvme-loop: add support for multiple ports This is useful at least for multipath testing. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/loop.c | 48 ++++++++++++++++++++++++++++----------------- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 31 insertions(+), 19 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 02cc316fad39..1304ec3a7ede 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -45,6 +45,7 @@ struct nvme_loop_ctrl { struct nvme_ctrl ctrl; struct nvmet_ctrl *target_ctrl; + struct nvmet_port *port; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -63,7 +64,8 @@ struct nvme_loop_queue { unsigned long flags; }; -static struct nvmet_port *nvmet_loop_port; +static LIST_HEAD(nvme_loop_ports); +static DEFINE_MUTEX(nvme_loop_ports_mutex); static LIST_HEAD(nvme_loop_ctrl_list); static DEFINE_MUTEX(nvme_loop_ctrl_mutex); @@ -169,7 +171,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; - iod->req.port = nvmet_loop_port; + iod->req.port = queue->ctrl->port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; @@ -517,6 +519,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .free_ctrl = nvme_loop_free_ctrl, .submit_async_event = nvme_loop_submit_async_event, .delete_ctrl = nvme_loop_delete_ctrl_host, + .get_address = nvmf_get_address, }; static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) @@ -565,6 +568,23 @@ out_destroy_queues: return ret; } +static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl) +{ + struct nvmet_port *p, *found = NULL; + + mutex_lock(&nvme_loop_ports_mutex); + list_for_each_entry(p, &nvme_loop_ports, entry) { + /* if no transport address is specified use the first port */ + if ((ctrl->opts->mask & NVMF_OPT_TRADDR) && + strcmp(ctrl->opts->traddr, p->disc_addr.traddr)) + continue; + found = p; + break; + } + mutex_unlock(&nvme_loop_ports_mutex); + return found; +} + static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -589,6 +609,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; + ctrl->port = nvme_loop_find_port(&ctrl->ctrl); ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues), GFP_KERNEL); @@ -646,27 +667,17 @@ out_put_ctrl: static int nvme_loop_add_port(struct nvmet_port *port) { - /* - * XXX: disalow adding more than one port so - * there is no connection rejections when a - * a subsystem is assigned to a port for which - * loop doesn't have a pointer. - * This scenario would be possible if we allowed - * more than one port to be added and a subsystem - * was assigned to a port other than nvmet_loop_port. - */ - - if (nvmet_loop_port) - return -EPERM; - - nvmet_loop_port = port; + mutex_lock(&nvme_loop_ports_mutex); + list_add_tail(&port->entry, &nvme_loop_ports); + mutex_unlock(&nvme_loop_ports_mutex); return 0; } static void nvme_loop_remove_port(struct nvmet_port *port) { - if (port == nvmet_loop_port) - nvmet_loop_port = NULL; + mutex_lock(&nvme_loop_ports_mutex); + list_del_init(&port->entry); + mutex_unlock(&nvme_loop_ports_mutex); } static const struct nvmet_fabrics_ops nvme_loop_ops = { @@ -682,6 +693,7 @@ static struct nvmf_transport_ops nvme_loop_transport = { .name = "loop", .module = THIS_MODULE, .create_ctrl = nvme_loop_create_ctrl, + .allowed_opts = NVMF_OPT_TRADDR, }; static int __init nvme_loop_init_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 2d09afcfe505..32ebffcf464c 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -85,7 +85,7 @@ struct nvmet_sq { /** * struct nvmet_port - Common structure to keep port * information for the target. - * @entry: List head for holding a list of these elements. + * @entry: Entry into referrals or transport list. * @disc_addr: Address information is stored in a format defined * for a discovery log page entry. * @group: ConfigFS group for this element's folder. -- cgit v1.2.3-59-g8ed1b From d250bf4e776ff09d51c97f83c7a19f65a9e1c5a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 May 2018 18:51:00 +0200 Subject: blk-mq: only iterate over inflight requests in blk_mq_tagset_busy_iter We already check for started commands in all callbacks, but we should also protect against already completed commands. Do this by taking the checks to common code. Acked-by: Josef Bacik Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 12 ++---------- drivers/block/nbd.c | 5 +---- drivers/nvme/host/core.c | 3 --- drivers/nvme/host/fc.c | 3 --- 5 files changed, 4 insertions(+), 21 deletions(-) (limited to 'drivers/nvme') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index a4e58fc28a06..70356a2a11ab 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -271,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; - if (rq) + if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) iter_data->fn(rq, iter_data->data, reserved); return true; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 95657b814543..c73626decb46 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2725,15 +2725,11 @@ static void mtip_softirq_done_fn(struct request *rq) blk_mq_end_request(rq, cmd->status); } -static void mtip_abort_cmd(struct request *req, void *data, - bool reserved) +static void mtip_abort_cmd(struct request *req, void *data, bool reserved) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; - if (!blk_mq_request_started(req)) - return; - dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); @@ -2741,14 +2737,10 @@ static void mtip_abort_cmd(struct request *req, void *data, mtip_softirq_done_fn(req); } -static void mtip_queue_cmd(struct request *req, void *data, - bool reserved) +static void mtip_queue_cmd(struct request *req, void *data, bool reserved) { struct driver_data *dd = data; - if (!blk_mq_request_started(req)) - return; - set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a6e3a6f05791..3ed1ef8ee528 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -676,11 +676,8 @@ static void recv_work(struct work_struct *work) static void nbd_clear_req(struct request *req, void *data, bool reserved) { - struct nbd_cmd *cmd; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2c4cf65641a6..70c3961676e7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -242,9 +242,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_cancel_request(struct request *req, void *data, bool reserved) { - if (!blk_mq_request_started(req)) - return; - dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ac35a80f5532..0bad65803271 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2393,9 +2393,6 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - __nvme_fc_abort_op(ctrl, op); } -- cgit v1.2.3-59-g8ed1b From cc456b65b739e17e8b1e6b61cc2f014f225ba466 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 May 2018 15:41:54 +0200 Subject: nvme-fabrics: allow internal passthrough command on deleting controllers Without this we can't cleanly shut down. Based on analysis an an earlier patch from Hannes Reinecke. Fixes: bb06ec31452f ("nvme: expand nvmf_check_if_ready checks") Reported-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fabrics.c | 79 ++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 48 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index aa318136460e..5f5f7067c41d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -545,71 +545,54 @@ blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, return BLK_STS_OK; switch (ctrl->state) { - case NVME_CTRL_DELETING: - goto reject_io; - case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: + case NVME_CTRL_DELETING: + /* + * This is the case of starting a new or deleting an association + * but connectivity was lost before it was fully created or torn + * down. We need to error the commands used to initialize the + * controller so the reconnect can go into a retry attempt. The + * commands should all be marked REQ_FAILFAST_DRIVER, which will + * hit the reject path below. Anything else will be queued while + * the state settles. + */ if (!is_connected) - /* - * This is the case of starting a new - * association but connectivity was lost - * before it was fully created. We need to - * error the commands used to initialize the - * controller so the reconnect can go into a - * retry attempt. The commands should all be - * marked REQ_FAILFAST_DRIVER, which will hit - * the reject path below. Anything else will - * be queued while the state settles. - */ - goto reject_or_queue_io; - - if ((queue_live && - !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || - (!queue_live && blk_rq_is_passthrough(rq) && - cmd->common.opcode == nvme_fabrics_command && - cmd->fabrics.fctype == nvme_fabrics_type_connect)) - /* - * If queue is live, allow only commands that - * are internally generated pass through. These - * are commands on the admin queue to initialize - * the controller. This will reject any ioctl - * admin cmds received while initializing. - * - * If the queue is not live, allow only a - * connect command. This will reject any ioctl - * admin cmd as well as initialization commands - * if the controller reverted the queue to non-live. - */ + break; + + /* + * If queue is live, allow only commands that are internally + * generated pass through. These are commands on the admin + * queue to initialize the controller. This will reject any + * ioctl admin cmds received while initializing. + */ + if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) return BLK_STS_OK; /* - * fall-thru to the reject_or_queue_io clause + * If the queue is not live, allow only a connect command. This + * will reject any ioctl admin cmd as well as initialization + * commands if the controller reverted the queue to non-live. */ + if (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect) + return BLK_STS_OK; break; - - /* these cases fall-thru - * case NVME_CTRL_LIVE: - * case NVME_CTRL_RESETTING: - */ default: break; } -reject_or_queue_io: /* - * Any other new io is something we're not in a state to send - * to the device. Default action is to busy it and retry it - * after the controller state is recovered. However, anything - * marked for failfast or nvme multipath is immediately failed. - * Note: commands used to initialize the controller will be - * marked for failfast. + * Any other new io is something we're not in a state to send to the + * device. Default action is to busy it and retry it after the + * controller state is recovered. However, anything marked for failfast + * or nvme multipath is immediately failed. Note: commands used to + * initialize the controller will be marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast. */ if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - -reject_io: nvme_req(rq)->status = NVME_SC_ABORT_REQ; return BLK_STS_IOERR; } -- cgit v1.2.3-59-g8ed1b From 81cf54e01a3d2eb47bee78a7579fc4dd0912f347 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 May 2018 01:55:58 +0000 Subject: nvmet: fix a typo in nvmet_file_ns_enable() Fix a typo in nvmet_file_ns_enable(). Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Wei Yongjun Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index ca1ccf867856..9cff553caa1f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -34,7 +34,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) O_RDWR | O_LARGEFILE | O_DIRECT, 0); if (IS_ERR(ns->file)) { pr_err("failed to open file %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + ns->device_path, PTR_ERR(ns->file)); return PTR_ERR(ns->file); } -- cgit v1.2.3-59-g8ed1b From 1367bc82858018fd1a5a81b8fad4628e7163d1bf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 May 2018 11:41:30 +0000 Subject: nvmet: fix error return code in nvmet_file_ns_enable() Fix to return error code -ENOMEM from the memory alloc fail error handling case instead of 0, as done elsewhere in this function. Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Wei Yongjun Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 9cff553caa1f..8c42b3a8c420 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -49,14 +49,18 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ns->bvec_cache = kmem_cache_create("nvmet-bvec", NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ns->bvec_cache) + if (!ns->bvec_cache) { + ret = -ENOMEM; goto err; + } ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, mempool_free_slab, ns->bvec_cache); - if (!ns->bvec_pool) + if (!ns->bvec_pool) { + ret = -ENOMEM; goto err; + } return ret; err: -- cgit v1.2.3-59-g8ed1b From 868c2392a700fc1e72fb1bc044b7cceecd4f095c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 May 2018 11:09:54 +0200 Subject: nvme.h: untangle AEN notice definitions Stop including the event type in the definitions for the notice type. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 30 ++++++++++++++++++------------ include/linux/nvme.h | 8 ++++++-- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2c4cf65641a6..86cb78653155 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3321,6 +3321,21 @@ static void nvme_fw_act_work(struct work_struct *work) nvme_get_fw_slot_info(ctrl); } +static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) +{ + switch ((result & 0xff00) >> 8) { + case NVME_AER_NOTICE_NS_CHANGED: + dev_info(ctrl->device, "rescanning\n"); + nvme_queue_scan(ctrl); + break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + queue_work(nvme_wq, &ctrl->fw_act_work); + break; + default: + dev_warn(ctrl->device, "async event result %08x\n", result); + } +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res) { @@ -3330,6 +3345,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, return; switch (result & 0x7) { + case NVME_AER_NOTICE: + nvme_handle_aen_notice(ctrl, result); + break; case NVME_AER_ERROR: case NVME_AER_SMART: case NVME_AER_CSS: @@ -3339,18 +3357,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, default: break; } - - switch (result & 0xff07) { - case NVME_AER_NOTICE_NS_CHANGED: - dev_info(ctrl->device, "rescanning\n"); - nvme_queue_scan(ctrl); - break; - case NVME_AER_NOTICE_FW_ACT_STARTING: - queue_work(nvme_wq, &ctrl->fw_act_work); - break; - default: - dev_warn(ctrl->device, "async event result %08x\n", result); - } queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_complete_async_event); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4112e2bd747f..c37103a4ad38 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -436,10 +436,14 @@ enum { enum { NVME_AER_ERROR = 0, NVME_AER_SMART = 1, + NVME_AER_NOTICE = 2, NVME_AER_CSS = 6, NVME_AER_VS = 7, - NVME_AER_NOTICE_NS_CHANGED = 0x0002, - NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, +}; + +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x00, + NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, }; struct nvme_lba_range_type { -- cgit v1.2.3-59-g8ed1b From c7759fff22657d0e03b508f8f3ef97efbb10bc13 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 May 2018 11:10:02 +0200 Subject: nvmet: add a new nvmet_zero_sgl helper Zeroes the SGL in the payload. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/core.c | 7 +++++++ drivers/nvme/target/nvmet.h | 1 + 2 files changed, 8 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 800aaf96ddcd..55c4bc693aa2 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -57,6 +57,13 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) return 0; } +u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) +{ + if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) + return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; + return 0; +} + static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) { struct nvmet_ns *ns; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 32ebffcf464c..768abe203298 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -330,6 +330,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, size_t len); u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len); +u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); -- cgit v1.2.3-59-g8ed1b From 8ab0805f115b660b81c84cab6dc76ff6117e9734 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 May 2018 11:10:03 +0200 Subject: nvmet: split log page implementation Remove the common code to allocate a buffer and copy it into the SGL. Instead the two no-op implementations just zero the SGL directly, and the smart log allocates a buffer on its own. This prepares for the more elaborate ANA log page. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/admin-cmd.c | 99 +++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 63 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index b2ba95b2eef7..e96bb02c4f2c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -32,6 +32,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) +{ + nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); +} + static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, struct nvme_smart_log *slog) { @@ -97,74 +102,26 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, return NVME_SC_SUCCESS; } -static u16 nvmet_get_smart_log(struct nvmet_req *req, - struct nvme_smart_log *slog) -{ - u16 status; - - WARN_ON(req == NULL || slog == NULL); - if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) - status = nvmet_get_smart_log_all(req, slog); - else - status = nvmet_get_smart_log_nsid(req, slog); - return status; -} - -static void nvmet_execute_get_log_page(struct nvmet_req *req) +static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) { - struct nvme_smart_log *smart_log; - size_t data_len = nvmet_get_log_page_len(req->cmd); - void *buf; - u16 status = 0; + struct nvme_smart_log *log; + u16 status = NVME_SC_INTERNAL; - buf = kzalloc(data_len, GFP_KERNEL); - if (!buf) { - status = NVME_SC_INTERNAL; + if (req->data_len != sizeof(*log)) goto out; - } - switch (req->cmd->get_log_page.lid) { - case NVME_LOG_ERROR: - /* - * We currently never set the More bit in the status field, - * so all error log entries are invalid and can be zeroed out. - * This is called a minum viable implementation (TM) of this - * mandatory log page. - */ - break; - case NVME_LOG_SMART: - /* - * XXX: fill out actual smart log - * - * We might have a hard time coming up with useful values for - * many of the fields, and even when we have useful data - * available (e.g. units or commands read/written) those aren't - * persistent over power loss. - */ - if (data_len != sizeof(*smart_log)) { - status = NVME_SC_INTERNAL; - goto err; - } - smart_log = buf; - status = nvmet_get_smart_log(req, smart_log); - if (status) - goto err; - break; - case NVME_LOG_FW_SLOT: - /* - * We only support a single firmware slot which always is - * active, so we can zero out the whole firmware slot log and - * still claim to fully implement this mandatory log page. - */ - break; - default: - BUG(); - } + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) + goto out; - status = nvmet_copy_to_sgl(req, 0, buf, data_len); + if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) + status = nvmet_get_smart_log_all(req, log); + else + status = nvmet_get_smart_log_nsid(req, log); + if (status) + goto out; -err: - kfree(buf); + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); out: nvmet_req_complete(req, status); } @@ -566,9 +523,25 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) switch (cmd->get_log_page.lid) { case NVME_LOG_ERROR: + /* + * We currently never set the More bit in the status + * field, so all error log entries are invalid and can + * be zeroed out. This is called a minum viable + * implementation (TM) of this mandatory log page. + */ + req->execute = nvmet_execute_get_log_page_noop; + return 0; case NVME_LOG_SMART: + req->execute = nvmet_execute_get_log_page_smart; + return 0; case NVME_LOG_FW_SLOT: - req->execute = nvmet_execute_get_log_page; + /* + * We only support a single firmware slot which always + * is active, so we can zero out the whole firmware slot + * log and still claim to fully implement this mandatory + * log page. + */ + req->execute = nvmet_execute_get_log_page_noop; return 0; } break; -- cgit v1.2.3-59-g8ed1b From c16734ea98f5549fed9a1af47e943a45855274e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 May 2018 17:16:09 +0200 Subject: nvmet: implement the changed namespaces log Just keep a per-controller buffer of changed namespaces and copy it out in the get log page implementation. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Daniel Verkamp --- drivers/nvme/target/admin-cmd.c | 26 +++++++++++++++++++ drivers/nvme/target/core.c | 56 ++++++++++++++++++++++++++++++++++------- drivers/nvme/target/nvmet.h | 3 +++ 3 files changed, 76 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e96bb02c4f2c..52129d13770e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -126,6 +126,29 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + u16 status = NVME_SC_INTERNAL; + size_t len; + + if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32)) + goto out; + + mutex_lock(&ctrl->lock); + if (ctrl->nr_changed_ns == U32_MAX) + len = sizeof(__le32); + else + len = ctrl->nr_changed_ns * sizeof(__le32); + status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len); + if (!status) + status = nvmet_zero_sgl(req, len, req->data_len - len); + ctrl->nr_changed_ns = 0; + mutex_unlock(&ctrl->lock); +out: + nvmet_req_complete(req, status); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -543,6 +566,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) */ req->execute = nvmet_execute_get_log_page_noop; return 0; + case NVME_LOG_CHANGED_NS: + req->execute = nvmet_execute_get_log_changed_ns; + return 0; } break; case nvme_admin_identify: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 55c4bc693aa2..4fe88e15c641 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -144,6 +144,42 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } +static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) +{ + u32 i; + + mutex_lock(&ctrl->lock); + if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) + goto out_unlock; + + for (i = 0; i < ctrl->nr_changed_ns; i++) { + if (ctrl->changed_ns_list[i] == nsid) + goto out_unlock; + } + + if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { + ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); + ctrl->nr_changed_ns = U32_MAX; + goto out_unlock; + } + + ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; +out_unlock: + mutex_unlock(&ctrl->lock); +} + +static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) +{ + struct nvmet_ctrl *ctrl; + + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); + nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_NOTICE_NS_CHANGED, + NVME_LOG_CHANGED_NS); + } +} + int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) { int ret = 0; @@ -287,7 +323,6 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns) int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; - struct nvmet_ctrl *ctrl; int ret = 0; mutex_lock(&subsys->lock); @@ -326,9 +361,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_add_tail_rcu(&ns->dev_link, &old->dev_link); } - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - + nvmet_ns_changed(subsys, ns->nsid); ns->enabled = true; ret = 0; out_unlock: @@ -342,7 +375,6 @@ out_dev_put: void nvmet_ns_disable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; - struct nvmet_ctrl *ctrl; mutex_lock(&subsys->lock); if (!ns->enabled) @@ -368,9 +400,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) percpu_ref_exit(&ns->ref); mutex_lock(&subsys->lock); - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - + nvmet_ns_changed(subsys, ns->nsid); nvmet_ns_dev_disable(ns); out_unlock: mutex_unlock(&subsys->lock); @@ -832,11 +862,16 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, + sizeof(__le32), GFP_KERNEL); + if (!ctrl->changed_ns_list) + goto out_free_ctrl; + ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), GFP_KERNEL); if (!ctrl->cqs) - goto out_free_ctrl; + goto out_free_changed_ns_list; ctrl->sqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_sq *), @@ -894,6 +929,8 @@ out_free_sqs: kfree(ctrl->sqs); out_free_cqs: kfree(ctrl->cqs); +out_free_changed_ns_list: + kfree(ctrl->changed_ns_list); out_free_ctrl: kfree(ctrl); out_put_subsystem: @@ -920,6 +957,7 @@ static void nvmet_ctrl_free(struct kref *ref) kfree(ctrl->sqs); kfree(ctrl->cqs); + kfree(ctrl->changed_ns_list); kfree(ctrl); nvmet_subsys_put(subsys); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 768abe203298..8cdc1e550396 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -135,6 +135,9 @@ struct nvmet_ctrl { const struct nvmet_fabrics_ops *ops; + __le32 *changed_ns_list; + u32 nr_changed_ns; + char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; }; -- cgit v1.2.3-59-g8ed1b From c86b8f7b4144affc4daa8b4047cd4da936b087ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 May 2018 15:04:47 +0200 Subject: nvmet: add AEN configuration support AEN configuration via the 'Get Features' and 'Set Features' admin command is mandatory, so we should be implemeting handling for it. Signed-off-by: Hannes Reinecke [hch: use WRITE_ONCE, check for invalid values] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Daniel Verkamp --- drivers/nvme/target/admin-cmd.c | 15 +++++++++++++-- drivers/nvme/target/core.c | 3 +++ drivers/nvme/target/nvmet.h | 16 ++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 52129d13770e..72ef17616682 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -189,7 +189,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->ver = cpu_to_le32(ctrl->subsys->ver); /* XXX: figure out what to do about RTD3R/RTD3 */ - id->oaes = cpu_to_le32(1 << 8); + id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL); id->ctratt = cpu_to_le32(1 << 0); id->oacs = 0; @@ -435,6 +435,16 @@ static void nvmet_execute_set_features(struct nvmet_req *req) req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; + case NVME_FEAT_ASYNC_EVENT: + val32 = le32_to_cpu(req->cmd->common.cdw10[1]); + if (val32 & ~NVMET_AEN_CFG_ALL) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); + nvmet_set_result(req, val32); + break; case NVME_FEAT_HOST_ID: status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; break; @@ -473,9 +483,10 @@ static void nvmet_execute_get_features(struct nvmet_req *req) break; case NVME_FEAT_WRITE_ATOMIC: break; +#endif case NVME_FEAT_ASYNC_EVENT: + nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); break; -#endif case NVME_FEAT_VOLATILE_WC: nvmet_set_result(req, 1); break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4fe88e15c641..5a086b670f42 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -174,6 +174,8 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); + if (!(READ_ONCE(ctrl->aen_enabled) & NVME_AEN_CFG_NS_ATTR)) + continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_NS_CHANGED, NVME_LOG_CHANGED_NS); @@ -861,6 +863,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, sizeof(__le32), GFP_KERNEL); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 8cdc1e550396..f4d16d9b3582 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -30,6 +30,21 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 + +/* + * Supported optional AENs: + */ +#define NVMET_AEN_CFG_OPTIONAL \ + NVME_AEN_CFG_NS_ATTR + +/* + * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): + */ +#define NVMET_AEN_CFG_ALL \ + (NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \ + NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \ + NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL) + /* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM * The 16 bit shift is to set IATTR bit to 1, which means offending * offset starts in the data section of connect() @@ -123,6 +138,7 @@ struct nvmet_ctrl { u16 cntlid; u32 kato; + u32 aen_enabled; struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; unsigned int nr_async_event_cmds; struct list_head async_events; -- cgit v1.2.3-59-g8ed1b From 55fdd6b6134fc91d4722149fa8e4f2df24c428fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 May 2018 15:05:09 +0200 Subject: nvmet: mask pending AENs Per section 5.2 of the NVMe 1.3 spec: "When the controller posts a completion queue entry for an outstanding Asynchronous Event Request command and thus reports an asynchronous event, subsequent events of that event type are automatically masked by the controller until the host clears that event. An event is cleared by reading the log page associated with that event using the Get Log Page command (see section 5.14)." Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 1 + drivers/nvme/target/core.c | 9 ++++++++- drivers/nvme/target/nvmet.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 72ef17616682..ead8fbe6922e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -144,6 +144,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) if (!status) status = nvmet_zero_sgl(req, len, req->data_len - len); ctrl->nr_changed_ns = 0; + clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked); mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5a086b670f42..a03da764ecae 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -144,6 +144,13 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } +static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) +{ + if (!(READ_ONCE(ctrl->aen_enabled) & aen)) + return true; + return test_and_set_bit(aen, &ctrl->aen_masked); +} + static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) { u32 i; @@ -174,7 +181,7 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); - if (!(READ_ONCE(ctrl->aen_enabled) & NVME_AEN_CFG_NS_ATTR)) + if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_NS_CHANGED, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f4d16d9b3582..480dfe10fad9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -139,6 +139,7 @@ struct nvmet_ctrl { u32 kato; u32 aen_enabled; + unsigned long aen_masked; struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; unsigned int nr_async_event_cmds; struct list_head async_events; -- cgit v1.2.3-59-g8ed1b From c0561f82a7b687050fa158930328b1b72559ffa8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 22 May 2018 11:09:55 +0200 Subject: nvme: submit AEN event configuration on startup We should register for AEN events; some law-abiding targets might not be sending us AENs otherwise. Signed-off-by: Hannes Reinecke [hch: slight cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 17 +++++++++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 18 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 86cb78653155..c0bc76d98194 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1030,6 +1030,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) } EXPORT_SYMBOL_GPL(nvme_set_queue_count); +#define NVME_AEN_SUPPORTED \ + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) + +static void nvme_enable_aen(struct nvme_ctrl *ctrl) +{ + u32 result; + int status; + + status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, + ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); + if (status) + dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", + ctrl->oaes & NVME_AEN_SUPPORTED); +} + static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -2347,6 +2362,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); + ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; ctrl->cntlid = le16_to_cpup(&id->cntlid); @@ -3379,6 +3395,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); + nvme_enable_aen(ctrl); queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ec6e4acc4d48..dcf9e9592c1a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -176,6 +176,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u32 oaes; u32 aen_result; unsigned int shutdown_timeout; unsigned int kato; -- cgit v1.2.3-59-g8ed1b From 50e8d8eeede8d1ef48699a82acfb7d4be07a05d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 May 2018 18:15:47 +0200 Subject: nvme: mark nvme_queue_scan static And move it toward the top of the file to avoid a forward declaration. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 19 +++++++++---------- drivers/nvme/host/nvme.h | 1 - 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c0bc76d98194..06cd04dcffbc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,6 +100,15 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); +static void nvme_queue_scan(struct nvme_ctrl *ctrl) +{ + /* + * Only new queue scan work when admin and IO queues are both alive + */ + if (ctrl->state == NVME_CTRL_LIVE) + queue_work(nvme_wq, &ctrl->scan_work); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -3214,16 +3223,6 @@ static void nvme_scan_work(struct work_struct *work) kfree(id); } -void nvme_queue_scan(struct nvme_ctrl *ctrl) -{ - /* - * Only new queue scan work when admin and IO queues are both alive - */ - if (ctrl->state == NVME_CTRL_LIVE) - queue_work(nvme_wq, &ctrl->scan_work); -} -EXPORT_SYMBOL_GPL(nvme_queue_scan); - /* * This function iterates the namespace list unlocked to allow recovery from * controller failure. It is up to the caller to ensure the namespace list is diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index dcf9e9592c1a..11681278fdf6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -395,7 +395,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); -void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, -- cgit v1.2.3-59-g8ed1b From 30d90964e7b80a0723921e8b464c5ceda7297a7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 May 2018 18:17:41 +0200 Subject: nvme: use the changed namespaces list log to clear ns data changed AENs Per section 5.2 we need to issue the corresponding log page to clear an AEN, so for a namespace data changed AEN we need to read the changed namespace list log. And once we read that log anyway we might as well use it to optimize the rescan. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 51 ++++++++++++++++++++++++++++++++++++++++++++---- drivers/nvme/host/nvme.h | 2 ++ 2 files changed, 49 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 06cd04dcffbc..1ae77428a1a5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3194,6 +3194,42 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) nvme_remove_invalid_namespaces(ctrl, nn); } +static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) +{ + size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); + __le32 *log; + int error, i; + bool ret = false; + + log = kzalloc(log_size, GFP_KERNEL); + if (!log) + return false; + + error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); + if (error) { + dev_warn(ctrl->device, + "reading changed ns log failed: %d\n", error); + goto out_free_log; + } + + if (log[0] == cpu_to_le32(0xffffffff)) + goto out_free_log; + + for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) { + u32 nsid = le32_to_cpu(log[i]); + + if (nsid == 0) + break; + dev_info(ctrl->device, "rescanning namespace %d.\n", nsid); + nvme_validate_ns(ctrl, nsid); + } + ret = true; + +out_free_log: + kfree(log); + return ret; +} + static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = @@ -3206,6 +3242,12 @@ static void nvme_scan_work(struct work_struct *work) WARN_ON_ONCE(!ctrl->tagset); + if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) { + if (nvme_scan_changed_ns_log(ctrl)) + goto out_sort_namespaces; + dev_info(ctrl->device, "rescanning namespaces.\n"); + } + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3213,14 +3255,15 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { if (!nvme_scan_ns_list(ctrl, nn)) - goto done; + goto out_free_id; } nvme_scan_ns_sequential(ctrl, nn); - done: +out_free_id: + kfree(id); +out_sort_namespaces: down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); - kfree(id); } /* @@ -3340,7 +3383,7 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { switch ((result & 0xff00) >> 8) { case NVME_AER_NOTICE_NS_CHANGED: - dev_info(ctrl->device, "rescanning\n"); + set_bit(EVENT_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 11681278fdf6..07e8bfe705c6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -189,6 +189,8 @@ struct nvme_ctrl { struct delayed_work ka_work; struct nvme_command ka_cmd; struct work_struct fw_act_work; +#define EVENT_NS_CHANGED (1 << 0) + unsigned long events; /* Power saving configuration */ u64 ps_max_latency_us; -- cgit v1.2.3-59-g8ed1b