aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c27
-rw-r--r--drivers/nvme/host/fabrics.c2
-rw-r--r--drivers/nvme/host/multipath.c5
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c217
-rw-r--r--drivers/nvme/host/rdma.c64
-rw-r--r--drivers/nvme/host/tcp.c35
7 files changed, 195 insertions, 161 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 08f2c92602f4..6a9dd68c0f4f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1253,6 +1253,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
* effects say only one namespace is affected.
*/
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+ mutex_lock(&ctrl->scan_lock);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
}
@@ -1281,8 +1282,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
*/
if (effects & NVME_CMD_EFFECTS_LBCC)
nvme_update_formats(ctrl);
- if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK))
+ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
+ mutex_unlock(&ctrl->scan_lock);
+ }
if (effects & NVME_CMD_EFFECTS_CCC)
nvme_init_identify(ctrl);
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
@@ -2173,18 +2176,20 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
size_t nqnlen;
int off;
- nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
- if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
- strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
- return;
- }
+ if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) {
+ nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+ if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+ strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
+ return;
+ }
- if (ctrl->vs >= NVME_VS(1, 2, 1))
- dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+ if (ctrl->vs >= NVME_VS(1, 2, 1))
+ dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+ }
/* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
off = snprintf(subsys->subnqn, NVMF_NQN_SIZE,
- "nqn.2014.08.org.nvmexpress:%4x%4x",
+ "nqn.2014.08.org.nvmexpress:%04x%04x",
le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn));
off += sizeof(id->sn);
@@ -2500,7 +2505,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oaes = le32_to_cpu(id->oaes);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
- ctrl->cntlid = le16_to_cpup(&id->cntlid);
if (id->mdts)
max_hw_sectors = 1 << (id->mdts + page_shift - 9);
else
@@ -3400,6 +3404,7 @@ static void nvme_scan_work(struct work_struct *work)
if (nvme_identify_ctrl(ctrl, &id))
return;
+ mutex_lock(&ctrl->scan_lock);
nn = le32_to_cpu(id->nn);
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -3408,6 +3413,7 @@ static void nvme_scan_work(struct work_struct *work)
}
nvme_scan_ns_sequential(ctrl, nn);
out_free_id:
+ mutex_unlock(&ctrl->scan_lock);
kfree(id);
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
@@ -3651,6 +3657,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->state = NVME_CTRL_NEW;
spin_lock_init(&ctrl->lock);
+ mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index b2ab213f43de..3eb908c50e1a 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -874,6 +874,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (opts->discovery_nqn) {
opts->kato = 0;
opts->nr_io_queues = 0;
+ opts->nr_write_queues = 0;
+ opts->nr_poll_queues = 0;
opts->duplicate_connect = true;
}
if (ctrl_loss_tmo < 0)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 183ec17ba067..b9fff3b8ed1b 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -545,8 +545,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
- if (!(ctrl->anacap & (1 << 6)))
- ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
+ ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
dev_err(ctrl->device,
@@ -570,6 +569,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return 0;
out_free_ana_log_buf:
kfree(ctrl->ana_log_buf);
+ ctrl->ana_log_buf = NULL;
out:
return error;
}
@@ -577,5 +577,6 @@ out:
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
kfree(ctrl->ana_log_buf);
+ ctrl->ana_log_buf = NULL;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2b36ac922596..c4a1bb41abf0 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -90,6 +90,11 @@ enum nvme_quirks {
* Set MEDIUM priority on SQ creation
*/
NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
+
+ /*
+ * Ignore device provided subnqn.
+ */
+ NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8),
};
/*
@@ -149,6 +154,7 @@ struct nvme_ctrl {
enum nvme_ctrl_state state;
bool identified;
spinlock_t lock;
+ struct mutex scan_lock;
const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
struct request_queue *connect_q;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5a0bf6a24d50..e905861186e3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -95,6 +95,7 @@ struct nvme_dev;
struct nvme_queue;
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1019,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
- if (++nvmeq->cq_head == nvmeq->q_depth) {
+ if (nvmeq->cq_head == nvmeq->q_depth - 1) {
nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase;
+ } else {
+ nvmeq->cq_head++;
}
}
@@ -1420,6 +1423,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
return 0;
}
+static void nvme_suspend_io_queues(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
+ nvme_suspend_queue(&dev->queues[i]);
+}
+
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
@@ -1485,8 +1496,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
if (dev->ctrl.queue_count > qid)
return 0;
- nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
- &nvmeq->cq_dma_addr, GFP_KERNEL);
+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+ &nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
@@ -1885,8 +1896,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
- dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
- le64_to_cpu(desc->addr));
+ dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr),
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
}
kfree(dev->host_mem_desc_bufs);
@@ -1915,8 +1927,8 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;
- descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
- &descs_dma, GFP_KERNEL);
+ descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
+ &descs_dma, GFP_KERNEL);
if (!descs)
goto out;
@@ -1952,8 +1964,9 @@ out_free_bufs:
while (--i >= 0) {
size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
- dma_free_coherent(dev->dev, size, bufs[i],
- le64_to_cpu(descs[i].addr));
+ dma_free_attrs(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr),
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
}
kfree(bufs);
@@ -2028,49 +2041,52 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
-static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
+/*
+ * nirqs is the number of interrupts available for write and read
+ * queues. The core already reserved an interrupt for the admin queue.
+ */
+static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
{
- unsigned int this_w_queues = write_queues;
-
- /*
- * Setup read/write queue split
- */
- if (irq_queues == 1) {
- dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
- dev->io_queues[HCTX_TYPE_READ] = 0;
- return;
- }
-
- /*
- * If 'write_queues' is set, ensure it leaves room for at least
- * one read queue
- */
- if (this_w_queues >= irq_queues)
- this_w_queues = irq_queues - 1;
+ struct nvme_dev *dev = affd->priv;
+ unsigned int nr_read_queues;
/*
- * If 'write_queues' is set to zero, reads and writes will share
- * a queue set.
+ * If there is no interupt available for queues, ensure that
+ * the default queue is set to 1. The affinity set size is
+ * also set to one, but the irq core ignores it for this case.
+ *
+ * If only one interrupt is available or 'write_queue' == 0, combine
+ * write and read queues.
+ *
+ * If 'write_queues' > 0, ensure it leaves room for at least one read
+ * queue.
*/
- if (!this_w_queues) {
- dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues;
- dev->io_queues[HCTX_TYPE_READ] = 0;
+ if (!nrirqs) {
+ nrirqs = 1;
+ nr_read_queues = 0;
+ } else if (nrirqs == 1 || !write_queues) {
+ nr_read_queues = 0;
+ } else if (write_queues >= nrirqs) {
+ nr_read_queues = 1;
} else {
- dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
- dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues;
+ nr_read_queues = nrirqs - write_queues;
}
+
+ dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
+ affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
+ dev->io_queues[HCTX_TYPE_READ] = nr_read_queues;
+ affd->set_size[HCTX_TYPE_READ] = nr_read_queues;
+ affd->nr_sets = nr_read_queues ? 2 : 1;
}
static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int irq_sets[2];
struct irq_affinity affd = {
- .pre_vectors = 1,
- .nr_sets = ARRAY_SIZE(irq_sets),
- .sets = irq_sets,
+ .pre_vectors = 1,
+ .calc_sets = nvme_calc_irq_sets,
+ .priv = dev,
};
- int result = 0;
unsigned int irq_queues, this_p_queues;
/*
@@ -2082,54 +2098,22 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- irq_queues = nr_io_queues - this_p_queues;
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
- /*
- * For irq sets, we have to ask for minvec == maxvec. This passes
- * any reduction back to us, so we can adjust our queue counts and
- * IRQ vector needs.
- */
- do {
- nvme_calc_io_queues(dev, irq_queues);
- irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
- irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
- if (!irq_sets[1])
- affd.nr_sets = 1;
-
- /*
- * If we got a failure and we're down to asking for just
- * 1 + 1 queues, just ask for a single vector. We'll share
- * that between the single IO queue and the admin queue.
- */
- if (result >= 0 && irq_queues > 1)
- irq_queues = irq_sets[0] + irq_sets[1] + 1;
-
- result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
- irq_queues,
- PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+ /* Initialize for the single interrupt case */
+ dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
+ dev->io_queues[HCTX_TYPE_READ] = 0;
- /*
- * Need to reduce our vec counts. If we get ENOSPC, the
- * platform should support mulitple vecs, we just need
- * to decrease our ask. If we get EINVAL, the platform
- * likely does not. Back down to ask for just one vector.
- */
- if (result == -ENOSPC) {
- irq_queues--;
- if (!irq_queues)
- return result;
- continue;
- } else if (result == -EINVAL) {
- irq_queues = 1;
- continue;
- } else if (result <= 0)
- return -EIO;
- break;
- } while (1);
+ return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
+ PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+}
- return result;
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+ if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
+ __nvme_disable_io_queues(dev, nvme_admin_delete_cq);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
@@ -2168,6 +2152,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
} while (1);
adminq->q_db = dev->dbs;
+ retry:
/* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq);
@@ -2185,25 +2170,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
result = max(result - 1, 1);
dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
- dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
- dev->io_queues[HCTX_TYPE_DEFAULT],
- dev->io_queues[HCTX_TYPE_READ],
- dev->io_queues[HCTX_TYPE_POLL]);
-
/*
* Should investigate if there's a performance win from allocating
* more queues than interrupt vectors; it might allow the submission
* path to scale better, even if the receive path is limited by the
* number of interrupts.
*/
-
result = queue_request_irq(adminq);
if (result) {
adminq->cq_vector = -1;
return result;
}
set_bit(NVMEQ_ENABLED, &adminq->flags);
- return nvme_create_io_queues(dev);
+
+ result = nvme_create_io_queues(dev);
+ if (result || dev->online_queues < 2)
+ return result;
+
+ if (dev->online_queues - 1 < dev->max_qid) {
+ nr_io_queues = dev->online_queues - 1;
+ nvme_disable_io_queues(dev);
+ nvme_suspend_io_queues(dev);
+ goto retry;
+ }
+ dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
+ dev->io_queues[HCTX_TYPE_DEFAULT],
+ dev->io_queues[HCTX_TYPE_READ],
+ dev->io_queues[HCTX_TYPE_POLL]);
+ return 0;
}
static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2248,7 +2242,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
+static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
{
int nr_queues = dev->online_queues - 1, sent = 0;
unsigned long timeout;
@@ -2294,7 +2288,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.nr_maps = 2; /* default + read */
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
- dev->tagset.nr_maps = HCTX_MAX_TYPES;
dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth =
@@ -2410,7 +2403,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -2437,13 +2429,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) {
- if (nvme_disable_io_queues(dev, nvme_admin_delete_sq))
- nvme_disable_io_queues(dev, nvme_admin_delete_cq);
+ nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
- for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
- nvme_suspend_queue(&dev->queues[i]);
-
+ nvme_suspend_io_queues(dev);
+ nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2527,27 +2517,18 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
- /*
- * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
- * initializing procedure here.
- */
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
- dev_warn(dev->ctrl.device,
- "failed to mark controller CONNECTING\n");
- goto out;
- }
-
+ mutex_lock(&dev->shutdown_lock);
result = nvme_pci_enable(dev);
if (result)
- goto out;
+ goto out_unlock;
result = nvme_pci_configure_admin_queue(dev);
if (result)
- goto out;
+ goto out_unlock;
result = nvme_alloc_admin_tags(dev);
if (result)
- goto out;
+ goto out_unlock;
/*
* Limit the max command size to prevent iod->sg allocations going
@@ -2555,6 +2536,17 @@ static void nvme_reset_work(struct work_struct *work)
*/
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_segments = NVME_MAX_SEGS;
+ mutex_unlock(&dev->shutdown_lock);
+
+ /*
+ * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
+ * initializing procedure here.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller CONNECTING\n");
+ goto out;
+ }
result = nvme_init_identify(&dev->ctrl);
if (result)
@@ -2619,6 +2611,8 @@ static void nvme_reset_work(struct work_struct *work)
nvme_start_ctrl(&dev->ctrl);
return;
+ out_unlock:
+ mutex_unlock(&dev->shutdown_lock);
out:
nvme_remove_dead_ctrl(dev, result);
}
@@ -2946,6 +2940,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_MEDIUM_PRIO_SQ },
+ { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
@@ -2988,6 +2984,7 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
+ BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0a2fd2949ad7..52abc3a6de12 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -119,6 +119,7 @@ struct nvme_rdma_ctrl {
struct nvme_ctrl ctrl;
bool use_inline_data;
+ u32 io_queues[HCTX_MAX_TYPES];
};
static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -165,8 +166,8 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
{
return nvme_rdma_queue_idx(queue) >
- queue->ctrl->ctrl.opts->nr_io_queues +
- queue->ctrl->ctrl.opts->nr_write_queues;
+ queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ queue->ctrl->io_queues[HCTX_TYPE_READ];
}
static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
@@ -661,8 +662,21 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
nr_io_queues = min_t(unsigned int, nr_io_queues,
ibdev->num_comp_vectors);
- nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
- nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
+ if (opts->nr_write_queues) {
+ ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+ min(opts->nr_write_queues, nr_io_queues);
+ nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
+ } else {
+ ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
+ }
+
+ ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
+
+ if (opts->nr_poll_queues) {
+ ctrl->io_queues[HCTX_TYPE_POLL] =
+ min(opts->nr_poll_queues, num_online_cpus());
+ nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
+ }
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
@@ -1689,18 +1703,28 @@ static enum blk_eh_timer_return
nvme_rdma_timeout(struct request *rq, bool reserved)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_rdma_queue *queue = req->queue;
+ struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- dev_warn(req->queue->ctrl->ctrl.device,
- "I/O %d QID %d timeout, reset controller\n",
- rq->tag, nvme_rdma_queue_idx(req->queue));
+ dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
+ rq->tag, nvme_rdma_queue_idx(queue));
- /* queue error recovery */
- nvme_rdma_error_recovery(req->queue->ctrl);
+ if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
+ /*
+ * Teardown immediately if controller times out while starting
+ * or we are already started error recovery. all outstanding
+ * requests are completed on shutdown, so we return BLK_EH_DONE.
+ */
+ flush_work(&ctrl->err_work);
+ nvme_rdma_teardown_io_queues(ctrl, false);
+ nvme_rdma_teardown_admin_queue(ctrl, false);
+ return BLK_EH_DONE;
+ }
- /* fail with DNR on cmd timeout */
- nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+ dev_warn(ctrl->ctrl.device, "starting error recovery\n");
+ nvme_rdma_error_recovery(ctrl);
- return BLK_EH_DONE;
+ return BLK_EH_RESET_TIMER;
}
static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1779,17 +1803,15 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
struct nvme_rdma_ctrl *ctrl = set->driver_data;
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
- set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ ctrl->io_queues[HCTX_TYPE_DEFAULT];
+ set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
if (ctrl->ctrl.opts->nr_write_queues) {
/* separate read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->ctrl.opts->nr_write_queues;
set->map[HCTX_TYPE_READ].queue_offset =
- ctrl->ctrl.opts->nr_write_queues;
+ ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
/* mixed read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->ctrl.opts->nr_io_queues;
set->map[HCTX_TYPE_READ].queue_offset = 0;
}
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
@@ -1799,12 +1821,12 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
if (ctrl->ctrl.opts->nr_poll_queues) {
set->map[HCTX_TYPE_POLL].nr_queues =
- ctrl->ctrl.opts->nr_poll_queues;
+ ctrl->io_queues[HCTX_TYPE_POLL];
set->map[HCTX_TYPE_POLL].queue_offset =
- ctrl->ctrl.opts->nr_io_queues;
+ ctrl->io_queues[HCTX_TYPE_DEFAULT];
if (ctrl->ctrl.opts->nr_write_queues)
set->map[HCTX_TYPE_POLL].queue_offset +=
- ctrl->ctrl.opts->nr_write_queues;
+ ctrl->io_queues[HCTX_TYPE_READ];
blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
}
return 0;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index de174912445e..5f0a00425242 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1565,8 +1565,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
{
nvme_tcp_stop_io_queues(ctrl);
if (remove) {
- if (ctrl->ops->flags & NVME_F_FABRICS)
- blk_cleanup_queue(ctrl->connect_q);
+ blk_cleanup_queue(ctrl->connect_q);
blk_mq_free_tag_set(ctrl->tagset);
}
nvme_tcp_free_io_queues(ctrl);
@@ -1587,12 +1586,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_free_io_queues;
}
- if (ctrl->ops->flags & NVME_F_FABRICS) {
- ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
- if (IS_ERR(ctrl->connect_q)) {
- ret = PTR_ERR(ctrl->connect_q);
- goto out_free_tag_set;
- }
+ ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ctrl->connect_q)) {
+ ret = PTR_ERR(ctrl->connect_q);
+ goto out_free_tag_set;
}
} else {
blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1606,7 +1603,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
return 0;
out_cleanup_connect_q:
- if (new && (ctrl->ops->flags & NVME_F_FABRICS))
+ if (new)
blk_cleanup_queue(ctrl->connect_q);
out_free_tag_set:
if (new)
@@ -1620,7 +1617,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
{
nvme_tcp_stop_queue(ctrl, 0);
if (remove) {
- free_opal_dev(ctrl->opal_dev);
blk_cleanup_queue(ctrl->admin_q);
blk_mq_free_tag_set(ctrl->admin_tagset);
}
@@ -1952,20 +1948,23 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
- dev_dbg(ctrl->ctrl.device,
+ dev_warn(ctrl->ctrl.device,
"queue %d: timeout request %#x type %d\n",
- nvme_tcp_queue_id(req->queue), rq->tag,
- pdu->hdr.type);
+ nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
- union nvme_result res = {};
-
- nvme_req(rq)->flags |= NVME_REQ_CANCELLED;
- nvme_end_request(rq, cpu_to_le16(NVME_SC_ABORT_REQ), res);
+ /*
+ * Teardown immediately if controller times out while starting
+ * or we are already started error recovery. all outstanding
+ * requests are completed on shutdown, so we return BLK_EH_DONE.
+ */
+ flush_work(&ctrl->err_work);
+ nvme_tcp_teardown_io_queues(&ctrl->ctrl, false);
+ nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false);
return BLK_EH_DONE;
}
- /* queue error recovery */
+ dev_warn(ctrl->ctrl.device, "starting error recovery\n");
nvme_tcp_error_recovery(&ctrl->ctrl);
return BLK_EH_RESET_TIMER;