From d554b5e1ca64d23e4f839e6531490fee8479fbaf Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 27 Jun 2017 22:27:57 -0400
Subject: nvme: Quirks for PM1725 controllers

PM1725 controllers have a couple of quirks that need to be handled in
the driver:

 - I/O queue depth must be limited to 64 entries on controllers that do
   not report MQES.

 - The host interface registers go offline briefly while resetting the
   chip. Thus a delay is needed before checking whether the controller
   is ready.

Note that the admin queue depth is also limited to 64 on older versions
of this board. Since our NVME_AQ_DEPTH is now 32 that is no longer an
issue.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 32a98e2740ad..343263bcb49a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1908,6 +1908,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
 			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
+	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+		   NVME_CAP_MQES(cap) == 0) {
+		dev->q_depth = 64;
+		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+                        "set queue depth=%u\n", dev->q_depth);
 	}
 
 	/*
@@ -2454,6 +2460,10 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
-- 
cgit v1.2.3-59-g8ed1b


From d858e5f04e58a42a6e0c8ec74ea15e3ea4bb45d0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 24 Apr 2017 10:58:29 +0300
Subject: nvme: move queue_count to the nvme_ctrl

All all transports use the queue_count in exactly the same, so move it to
the generic struct nvme_ctrl. In the future it will also be maintained by
the core.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-By: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c     | 35 +++++++++++++++++------------------
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    | 15 +++++++--------
 drivers/nvme/host/rdma.c   | 39 +++++++++++++++++++--------------------
 drivers/nvme/target/loop.c | 15 +++++++--------
 5 files changed, 51 insertions(+), 54 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ed87214fdc0e..7eb006427caf 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -148,7 +148,6 @@ struct nvme_fc_ctrl {
 	struct device		*dev;
 	struct nvme_fc_lport	*lport;
 	struct nvme_fc_rport	*rport;
-	u32			queue_count;
 	u32			cnum;
 
 	u64			association_id;
@@ -1614,7 +1613,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_free_queue(&ctrl->queues[i]);
 }
 
@@ -1635,10 +1634,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
 static void
 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1];
+	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
 	int i;
 
-	for (i = ctrl->queue_count - 1; i >= 1; i--, queue--)
+	for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
 		__nvme_fc_delete_hw_queue(ctrl, queue, i);
 }
 
@@ -1648,7 +1647,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 	struct nvme_fc_queue *queue = &ctrl->queues[1];
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++, queue++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
 		ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
 		if (ret)
 			goto delete_queues;
@@ -1667,7 +1666,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
 					(qsize / 5));
 		if (ret)
@@ -1685,7 +1684,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
 }
 
@@ -2187,7 +2186,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 		return ret;
 	}
 
-	ctrl->queue_count = opts->nr_io_queues + 1;
+	ctrl->ctrl.queue_count = opts->nr_io_queues + 1;
 	if (!opts->nr_io_queues)
 		return 0;
 
@@ -2204,7 +2203,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 						sizeof(struct scatterlist)) +
 					ctrl->lport->ops->fcprqst_priv_sz;
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -2258,7 +2257,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	}
 
 	/* check for io queues existing */
-	if (ctrl->queue_count == 1)
+	if (ctrl->ctrl.queue_count == 1)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2381,7 +2380,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * Create the io queues
 	 */
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		if (ctrl->ctrl.state == NVME_CTRL_NEW)
 			ret = nvme_fc_create_io_queues(ctrl);
 		else
@@ -2395,7 +2394,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
@@ -2447,7 +2446,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * io requests back to the block layer as part of normal completions
 	 * (but with error status).
 	 */
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2702,18 +2701,18 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-	ctrl->queue_count = min_t(unsigned int,
+	ctrl->ctrl.queue_count = min_t(unsigned int,
 				opts->nr_io_queues,
 				lport->ops->max_hw_queues);
-	opts->nr_io_queues = ctrl->queue_count;	/* so opts has valid value */
-	ctrl->queue_count++;	/* +1 for admin queue */
+	opts->nr_io_queues = ctrl->ctrl.queue_count;	/* so opts has valid value */
+	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
-				GFP_KERNEL);
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
+				sizeof(struct nvme_fc_queue), GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_free_ida;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d70ff0fdd36b..6c51d92b7fab 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -142,6 +142,7 @@ struct nvme_ctrl {
 	u16 cntlid;
 
 	u32 ctrl_config;
+	u32 queue_count;
 
 	u32 page_size;
 	u32 max_hw_sectors;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 343263bcb49a..6b50c9096fe4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -74,7 +74,6 @@ struct nvme_dev {
 	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
-	unsigned queue_count;
 	unsigned online_queues;
 	unsigned max_qid;
 	int q_depth;
@@ -1099,9 +1098,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
 	int i;
 
-	for (i = dev->queue_count - 1; i >= lowest; i--) {
+	for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
 		struct nvme_queue *nvmeq = dev->queues[i];
-		dev->queue_count--;
+		dev->ctrl.queue_count--;
 		dev->queues[i] = NULL;
 		nvme_free_queue(nvmeq);
 	}
@@ -1221,7 +1220,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->qid = qid;
 	nvmeq->cq_vector = -1;
 	dev->queues[qid] = nvmeq;
-	dev->queue_count++;
+	dev->ctrl.queue_count++;
 
 	return nvmeq;
 
@@ -1441,7 +1440,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	unsigned i, max;
 	int ret = 0;
 
-	for (i = dev->queue_count; i <= dev->max_qid; i++) {
+	for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
 		/* vector == qid - 1, match nvme_create_queue */
 		if (!nvme_alloc_queue(dev, i, dev->q_depth,
 		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1449,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 		}
 	}
 
-	max = min(dev->max_qid, dev->queue_count - 1);
+	max = min(dev->max_qid, dev->ctrl.queue_count - 1);
 	for (i = dev->online_queues; i <= max; i++) {
 		ret = nvme_create_queue(dev->queues[i], i);
 		if (ret)
@@ -2001,7 +2000,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	nvme_stop_queues(&dev->ctrl);
 
 	queues = dev->online_queues - 1;
-	for (i = dev->queue_count - 1; i > 0; i--)
+	for (i = dev->ctrl.queue_count - 1; i > 0; i--)
 		nvme_suspend_queue(dev->queues[i]);
 
 	if (dead) {
@@ -2009,7 +2008,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		 * probe, before the admin queue is configured. Thus,
 		 * queue_count can be 0 here.
 		 */
-		if (dev->queue_count)
+		if (dev->ctrl.queue_count)
 			nvme_suspend_queue(dev->queues[0]);
 	} else {
 		nvme_disable_io_queues(dev, queues);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d4119dfbdaa..c4bacad3fe23 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -103,7 +103,6 @@ struct nvme_rdma_queue {
 struct nvme_rdma_ctrl {
 	/* read only in the hot path */
 	struct nvme_rdma_queue	*queues;
-	u32			queue_count;
 
 	/* other member variables */
 	struct blk_mq_tag_set	tag_set;
@@ -349,7 +348,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_rdma_ctrl *ctrl = data;
 	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -587,7 +586,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 }
 
@@ -595,7 +594,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret) {
 			dev_info(ctrl->ctrl.device,
@@ -623,14 +622,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
 	if (ret)
 		return ret;
 
-	ctrl->queue_count = nr_io_queues + 1;
-	if (ctrl->queue_count < 2)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (ctrl->ctrl.queue_count < 2)
 		return 0;
 
 	dev_info(ctrl->ctrl.device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_rdma_init_queue(ctrl, i,
 					   ctrl->ctrl.opts->queue_size);
 		if (ret) {
@@ -705,7 +704,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
 
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -735,7 +734,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
 			goto requeue;
@@ -749,7 +748,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	WARN_ON_ONCE(!changed);
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
@@ -772,15 +771,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	for (i = 0; i < ctrl->queue_count; i++)
+	for (i = 0; i < ctrl->ctrl.queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
 
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 
 	/* We must take care of fastfail/requeue all our inflight requests */
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
@@ -1624,7 +1623,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -1716,7 +1715,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		goto del_dead_ctrl;
 	}
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
 		if (ret)
 			goto del_dead_ctrl;
@@ -1733,7 +1732,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
@@ -1785,7 +1784,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -1863,12 +1862,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
-	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+	ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
 				GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_uninit_ctrl;
@@ -1925,7 +1924,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 5f55c683b338..edf0e2ab19e3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -44,7 +44,6 @@ struct nvme_loop_iod {
 
 struct nvme_loop_ctrl {
 	struct nvme_loop_queue	*queues;
-	u32			queue_count;
 
 	struct blk_mq_tag_set	admin_tag_set;
 
@@ -241,7 +240,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_loop_ctrl *ctrl = data;
 	struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -307,7 +306,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
 }
 
@@ -330,7 +329,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 		if (ret)
 			goto out_destroy_queues;
 
-		ctrl->queue_count++;
+		ctrl->ctrl.queue_count++;
 	}
 
 	return 0;
@@ -344,7 +343,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
 			return ret;
@@ -372,7 +371,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
 	if (error)
 		return error;
-	ctrl->queue_count = 1;
+	ctrl->ctrl.queue_count = 1;
 
 	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (error)
@@ -426,7 +425,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -559,7 +558,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 	ctrl->ctrl.tagset = &ctrl->tag_set;
 
-- 
cgit v1.2.3-59-g8ed1b


From 20d0dfe65afd3fb59d14720570a6921eb6bf5c1f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 27 Jun 2017 22:16:38 +0300
Subject: nvme: move ctrl cap to struct nvme_ctrl

All transports use either a private cache of controller cap or an on-stack
copy, move it to the generic struct nvme_ctrl. In the future it will also
be maintained by the core.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c     |  8 +++-----
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    | 20 +++++++++-----------
 drivers/nvme/host/rdma.c   | 10 +++++-----
 drivers/nvme/target/loop.c |  7 +++----
 5 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7eb006427caf..2f990d979037 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -152,8 +152,6 @@ struct nvme_fc_ctrl {
 
 	u64			association_id;
 
-	u64			cap;
-
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
 
 	struct blk_mq_tag_set	admin_tag_set;
@@ -2328,7 +2326,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * prior connection values
 	 */
 
-	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (ret) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -2336,9 +2334,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto out_disconnect_admin_queue;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6c51d92b7fab..e0b83311d5de 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -144,6 +144,7 @@ struct nvme_ctrl {
 	u32 ctrl_config;
 	u32 queue_count;
 
+	u64 cap;
 	u32 page_size;
 	u32 max_hw_sectors;
 	u16 oncs;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6b50c9096fe4..fe3907a082ba 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1144,8 +1144,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 	if (shutdown)
 		nvme_shutdown_ctrl(&dev->ctrl);
 	else
-		nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
-						dev->bar + NVME_REG_CAP));
+		nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
@@ -1388,7 +1387,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
-	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
 	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1396,13 +1394,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
-						NVME_CAP_NSSRC(cap) : 0;
+				NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
 
 	if (dev->subsystem &&
 	    (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
 		writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-	result = nvme_disable_ctrl(&dev->ctrl, cap);
+	result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result < 0)
 		return result;
 
@@ -1421,7 +1419,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
 	lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-	result = nvme_enable_ctrl(&dev->ctrl, cap);
+	result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result)
 		return result;
 
@@ -1865,7 +1863,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
-	u64 cap;
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1892,10 +1889,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+				NVME_Q_DEPTH);
+	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
 	/*
@@ -1909,7 +1907,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 			dev->q_depth);
 	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
 		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
-		   NVME_CAP_MQES(cap) == 0) {
+		   NVME_CAP_MQES(dev->ctrl.cap) == 0) {
 		dev->q_depth = 64;
 		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
                         "set queue depth=%u\n", dev->q_depth);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c4bacad3fe23..2ab0cdb4d881 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -118,7 +118,6 @@ struct nvme_rdma_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct nvme_rdma_device	*device;
 
-	u64			cap;
 	u32			max_fr_pages;
 
 	struct sockaddr_storage addr;
@@ -728,7 +727,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto requeue;
 
@@ -1573,7 +1572,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+			&ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -1581,9 +1581,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index edf0e2ab19e3..568ed8625696 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -48,7 +48,6 @@ struct nvme_loop_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 
 	struct list_head	list;
-	u64			cap;
 	struct blk_mq_tag_set	tag_set;
 	struct nvme_loop_iod	async_event_iod;
 	struct nvme_ctrl	ctrl;
@@ -387,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -395,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
-- 
cgit v1.2.3-59-g8ed1b


From 01ad0990467eaa17ae17db7376a4f02739f558c0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 1 May 2017 00:27:17 +0300
Subject: nvme-pci: rename to nvme_pci_configure_admin_queue

we are going to need the name for the core routine...

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fe3907a082ba..eb729ff70e7d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1383,7 +1383,7 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
 	return 0;
 }
 
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
@@ -2096,7 +2096,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	result = nvme_configure_admin_queue(dev);
+	result = nvme_pci_configure_admin_queue(dev);
 	if (result)
 		goto out;
 
-- 
cgit v1.2.3-59-g8ed1b


From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:10 -0700
Subject: bio-integrity: fix interface for bio_integrity_trim

bio_integrity_trim inherent it's interface from bio_trim and accept
offset and size, but this API is error prone because data offset
must always be insync with bio's data offset. That is why we have
integrity update hook in bio_advance()

So only meaningful values are: offset == 0, sectors == bio_sectors(bio)
Let's just remove them completely.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 11 ++---------
 block/bio.c           |  4 ++--
 drivers/md/dm.c       |  2 +-
 include/linux/bio.h   |  5 ++---
 4 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8c2253c59edb..3a0d71199fb0 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -433,22 +433,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:	bio whose integrity vector to update
- * @offset:	offset to first data sector
- * @sectors:	number of data sectors
  *
  * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
  */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
-			unsigned int sectors)
+void bio_integrity_trim(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio_integrity_advance(bio, offset << 9);
-	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 
diff --git a/block/bio.c b/block/bio.c
index 5b4b32a2f8d0..a6b225324a61 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1868,7 +1868,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	split->bi_iter.bi_size = sectors << 9;
 
 	if (bio_integrity(split))
-		bio_integrity_trim(split, 0, sectors);
+		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
 
@@ -1902,7 +1902,7 @@ void bio_trim(struct bio *bio, int offset, int size)
 	bio->bi_iter.bi_size = size;
 
 	if (bio_integrity(bio))
-		bio_integrity_trim(bio, 0, size);
+		bio_integrity_trim(bio);
 
 }
 EXPORT_SYMBOL_GPL(bio_trim);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 402946035308..13e714ea7a42 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1153,7 +1153,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (unlikely(bio_integrity(bio) != NULL))
-		bio_integrity_trim(clone, 0, len);
+		bio_integrity_trim(clone);
 
 	return 0;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 664a27da276d..1d74f5120369 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio,
 	return;
 }
 
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
-				      unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
 {
 	return;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:11 -0700
Subject: bio-integrity: fold bio_integrity_enabled to bio_integrity_prep

Currently all integrity prep hooks are open-coded, and if prepare fails
we ignore it's code and fail bio with EIO. Let's return real error to
upper layer, so later caller may react accordingly.

In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled,
so it is reasonable to fold it in to one function.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: merged with the latest block tree,
	return bool from bio_integrity_prep]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/data-integrity.txt |  6 +--
 block/bio-integrity.c                  | 88 +++++++++++++++-------------------
 block/blk-core.c                       |  5 +-
 block/blk-mq.c                         |  4 +-
 drivers/nvdimm/blk.c                   | 13 +----
 drivers/nvdimm/btt.c                   | 13 +----
 include/linux/bio.h                    | 12 ++---
 7 files changed, 50 insertions(+), 91 deletions(-)

(limited to 'drivers')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index f56ec97f0d14..934c44ea0c57 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -192,7 +192,7 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bio_integrity_prep(bio);
+    bool bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
       filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@ will require extra work due to the application tag.
       sector must be set, and the bio should have all data pages
       added.  It is up to the caller to ensure that the bio does not
       change while I/O is in progress.
-
-      bio_integrity_prep() should only be called if
-      bio_integrity_enabled() returned 1.
+      Complete bio with error if prepare failed for some reson.
 
 
 5.3 PASSING EXISTING INTEGRITY METADATA
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 3a0d71199fb0..44c4c52681c2 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio:	bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not.	bio data direction and target device must be
- * set prior to calling.  The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-bool bio_integrity_enabled(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-		return false;
-
-	if (!bio_sectors(bio))
-		return false;
-
-	/* Already protected? */
-	if (bio_integrity(bio))
-		return false;
-
-	if (bi == NULL)
-		return false;
-
-	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_VERIFY))
-		return true;
-
-	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_GENERATE))
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
 /**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
@@ -262,14 +224,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
  *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio.  The bio must have data
- * direction, target device and start sector set priot to calling.  In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function.  In the READ case, the buffer
+ * Description:  Checks if the bio already has an integrity payload attached.
+ * If it does, the payload has been generated by another kernel subsystem,
+ * and we just pass it through. Otherwise allocates integrity payload.
+ * The bio must have data direction, target device and start sector set priot
+ * to calling.  In the WRITE case, integrity metadata will be generated using
+ * the block device's integrity function.  In the READ case, the buffer
  * will be prepared for DMA and a suitable end_io handler set up.
  */
-int bio_integrity_prep(struct bio *bio)
+bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi;
@@ -279,20 +242,41 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
 	unsigned int intervals;
+	blk_status_t status;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
-	BUG_ON(bi == NULL);
-	BUG_ON(bio_integrity(bio));
+	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
+		return true;
+
+	if (!bio_sectors(bio))
+		return true;
 
+	/* Already protected? */
+	if (bio_integrity(bio))
+		return true;
+
+	if (bi == NULL)
+		return true;
+
+	if (bio_data_dir(bio) == READ) {
+		if (!bi->profile->verify_fn ||
+		    !(bi->flags & BLK_INTEGRITY_VERIFY))
+			return true;
+	} else {
+		if (!bi->profile->generate_fn ||
+		    !(bi->flags & BLK_INTEGRITY_GENERATE))
+			return true;
+	}
 	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
 	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+	status = BLK_STS_RESOURCE;
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -ENOMEM;
+		goto err_end_io;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +288,8 @@ int bio_integrity_prep(struct bio *bio)
 	if (IS_ERR(bip)) {
 		printk(KERN_ERR "could not allocate data integrity bioset\n");
 		kfree(buf);
-		return PTR_ERR(bip);
+		status = BLK_STS_RESOURCE;
+		goto err_end_io;
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -349,8 +334,13 @@ int bio_integrity_prep(struct bio *bio)
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
 		bio_integrity_process(bio, bi->profile->generate_fn);
+	return true;
+
+err_end_io:
+	bio->bi_status = status;
+	bio_endio(bio);
+	return false;
 
-	return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index af393d5a9680..970b9c9638c5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (op_is_flush(bio->bi_opf)) {
 		spin_lock_irq(q->queue_lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ced2b000ca02..77617fb12661 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1550,10 +1550,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio_io_error(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index f12d23c49771..1a578b2a437b 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -179,16 +179,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0, rw;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	bip = bio_integrity(bio);
 	nsblk = q->queuedata;
@@ -212,7 +204,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
- out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b6ba0618ea46..b5caaee78bbf 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1203,16 +1203,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1231,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
-out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1d74f5120369..b3b5f5a89a9c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -724,8 +724,7 @@ struct biovec_slab {
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
@@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio)
 	return NULL;
 }
 
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
-	return false;
-}
-
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
 	return 0;
@@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
 	return;
 }
 
-static inline int bio_integrity_prep(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
 {
-	return 0;
+	return true;
 }
 
 static inline void bio_integrity_free(struct bio *bio)
-- 
cgit v1.2.3-59-g8ed1b


From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:12 -0700
Subject: t10-pi: Move opencoded contants to common header

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/t10-pi.c                   | 9 +++------
 drivers/scsi/lpfc/lpfc_scsi.c    | 5 +++--
 drivers/scsi/qla2xxx/qla_isr.c   | 8 ++++----
 drivers/target/target_core_sbc.c | 2 +-
 include/linux/t10-pi.h           | 2 ++
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/block/t10-pi.c b/block/t10-pi.c
index 3416dadf7b15..a98db384048f 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -28,9 +28,6 @@
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
-static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
-static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
-
 static __be16 t10_pi_crc_fn(void *data, unsigned int len)
 {
 	return cpu_to_be16(crc_t10dif(data, len));
@@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 		switch (type) {
 		case 1:
 		case 2:
-			if (pi->app_tag == APP_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE)
 				goto next;
 
 			if (be32_to_cpu(pi->ref_tag) !=
@@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 			}
 			break;
 		case 3:
-			if (pi->app_tag == APP_ESCAPE &&
-			    pi->ref_tag == REF_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE &&
+			    pi->ref_tag == T10_PI_REF_ESCAPE)
 				goto next;
 			break;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 54fd0c81ceaf..99d2e990b231 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/unaligned.h>
+#include <linux/t10-pi.h>
 #include <linux/crc-t10dif.h>
 #include <net/checksum.h>
 
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 				 * First check to see if a protection data
 				 * check is valid
 				 */
-				if ((src->ref_tag == 0xffffffff) ||
-				    (src->app_tag == 0xffff)) {
+				if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+				    (src->app_tag == T10_PI_APP_ESCAPE)) {
 					start_ref_tag++;
 					goto skipit;
 				}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 2572121b765b..de031aed94f6 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1950,9 +1950,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 	 * For type     3: ref & app tag is all 'f's
 	 * For type 0,1,2: app tag is all 'f's
 	 */
-	if ((a_app_tag == 0xffff) &&
+	if ((a_app_tag == T10_PI_APP_ESCAPE) &&
 	    ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
-	     (a_ref_tag == 0xffffffff))) {
+	     (a_ref_tag == T10_PI_REF_ESCAPE))) {
 		uint32_t blocks_done, resid;
 		sector_t lba_s = scsi_get_lba(cmd);
 
@@ -1994,9 +1994,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 			spt = page_address(sg_page(sg)) + sg->offset;
 			spt += j;
 
-			spt->app_tag = 0xffff;
+			spt->app_tag = T10_PI_APP_ESCAPE;
 			if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
+				spt->ref_tag = T10_PI_REF_ESCAPE;
 		}
 
 		return 0;
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b65fb7..dc9456e7dac9 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
 				 (unsigned long long)sector, sdt->guard_tag,
 				 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-			if (sdt->app_tag == cpu_to_be16(0xffff)) {
+			if (sdt->app_tag == T10_PI_APP_ESCAPE) {
 				dsg_off += block_size;
 				goto next;
 			}
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 9375d23a24e7..635a3c5706bd 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -33,6 +33,8 @@ struct t10_pi_tuple {
 	__be32 ref_tag;		/* Target LBA or indirect LBA */
 };
 
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
-- 
cgit v1.2.3-59-g8ed1b


From b1fb2c52b2d85f51f36f1661409f9aeef94265ff Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:13 -0700
Subject: block: guard bvec iteration logic

Currently if some one try to advance bvec beyond it's size we simply
dump WARN_ONCE and continue to iterate beyond bvec array boundaries.
This simply means that we endup dereferencing/corrupting random memory
region.

Sane reaction would be to propagate error back to calling context
But bvec_iter_advance's calling context is not always good for error
handling. For safity reason let truncate iterator size to zero which
will break external iteration loop which prevent us from unpredictable
memory range corruption. And even it caller ignores an error, it will
corrupt it's own bvecs, not others.

This patch does:
- Return error back to caller with hope that it will react on this
- Truncate iterator size

Code was added long time ago here 4550dd6c, luckily no one hit it
in real life :)

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: switch to true/false returns instead of errno values]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/blk.c |  3 ++-
 drivers/nvdimm/btt.c |  3 ++-
 include/linux/bio.h  |  4 +++-
 include/linux/bvec.h | 14 +++++++++-----
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 1a578b2a437b..345acca576b3 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 
 		len -= cur_len;
 		dev_offset += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b5caaee78bbf..d00c10f382f0 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 
 		len -= cur_len;
 		meta_nsoff += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return ret;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3b5f5a89a9c..d5e8689f86b8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -167,8 +167,10 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
-	else
+	else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		/* TODO: It is reasonable to complete bio with error here. */
+	}
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 89b65b82d98f..de317b4c13c1 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -66,12 +67,14 @@ struct bvec_iter {
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline void bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
-	WARN_ONCE(bytes > iter->bi_size,
-		  "Attempted to advance past end of bvec iter\n");
+	if (WARN_ONCE(bytes > iter->bi_size,
+		     "Attempted to advance past end of bvec iter\n")) {
+		iter->bi_size = 0;
+		return false;
+	}
 
 	while (bytes) {
 		unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -86,6 +89,7 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
 			iter->bi_idx++;
 		}
 	}
+	return true;
 }
 
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
-- 
cgit v1.2.3-59-g8ed1b


From 7314183d1d0c200def4d0f5a6d978d3b29d28813 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:16:49 +0300
Subject: nvme-fc: don't override opts->nr_io_queues

Its what the user passed, so its probably a better
idea to keep it intact. Also, limit the number of
I/O queues to max online cpus and the lport maximum
hw queues.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2f990d979037..996720043b3a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2175,17 +2175,20 @@ static int
 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
-	ctrl->ctrl.queue_count = opts->nr_io_queues + 1;
-	if (!opts->nr_io_queues)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (!nr_io_queues)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2245,15 +2248,19 @@ static int
 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
 	/* check for io queues existing */
 	if (ctrl->ctrl.queue_count == 1)
 		return 0;
@@ -2702,7 +2709,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->ctrl.queue_count = min_t(unsigned int,
 				opts->nr_io_queues,
 				lport->ops->max_hw_queues);
-	opts->nr_io_queues = ctrl->ctrl.queue_count;	/* so opts has valid value */
 	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
-- 
cgit v1.2.3-59-g8ed1b


From 4c8b99f6b1ff086c1745fcb0511800fba5c4fb34 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:10:44 +0300
Subject: nvme-rdma: update tagset nr_hw_queues after reconnecting/resetting

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline or controller constraints.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2ab0cdb4d881..cfb22531fc16 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -741,6 +741,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto requeue;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -1727,6 +1730,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto del_dead_ctrl;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-- 
cgit v1.2.3-59-g8ed1b


From 4368c39bf6e6a2bfffb7a72d78912c68f156e8da Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:13:43 +0300
Subject: nvme-loop: update tagset nr_hw_queues after reconnecting/resetting

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 568ed8625696..3d51341e62ee 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -508,6 +508,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_destroy_io;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+			ctrl->ctrl.queue_count - 1);
+
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-- 
cgit v1.2.3-59-g8ed1b


From cda5fd1ac5c2c42d1b2e1847aa0438b229c5c068 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:20:10 +0300
Subject: nvme-fc: update tagset nr_hw_queues after queues reinit

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline or controller constraints.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 996720043b3a..1e859ee2b565 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2279,6 +2279,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_delete_hw_queues;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+
 	return 0;
 
 out_delete_hw_queues:
-- 
cgit v1.2.3-59-g8ed1b


From e5859d3a0ea07bf948f4538d47fe6547f6e115be Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:18:50 +0300
Subject: nvme-fc: use blk_mq_delay_run_hw_queue instead of open-coding it

Cc: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 1e859ee2b565..50cc3b2b0e11 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1966,10 +1966,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret != -EBUSY)
 			return BLK_STS_IOERR;
 
-		if (op->rq) {
-			blk_mq_stop_hw_queues(op->rq->q);
-			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
-		}
+		if (op->rq)
+			blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
+
 		return BLK_STS_RESOURCE;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8a05aa4ce3bf767b7bd72851da89c23d084d1ecb Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 5 Jul 2017 12:14:27 +0800
Subject: mtip32xx: avoid to read HOST_CAP from HW in .queue_rq()

It is observed reading the register from HW takes a bit long,
for example in my box, the following difference of 'perf report
--no-children fio ...' can be seen when running I/O:

1) V4.12 without patch
+    9.28%       fio  [mtip32xx]           [k] mtip_irq_handler
+    8.48%       fio  [mtip32xx]           [k] mtip_init_cmd_header

2) V4.12 with the following patch
+    9.14%       fio  [mtip32xx]           [k] mtip_irq_handler
......
+    1.14%       fio  [mtip32xx]           [k] mtip_init_cmd_header

IOPS can be increased by ~5% with this patch too.

Fixes: a4e84aae8139(mtip32xx: use runtime tag to initialize command header)
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 4 ++--
 drivers/block/mtip32xx/mtip32xx.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f256ca..ae910347b7cf 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -174,7 +174,6 @@ static void mtip_init_cmd_header(struct request *rq)
 {
 	struct driver_data *dd = rq->q->queuedata;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
 	/* Point the command headers at the command tables. */
 	cmd->command_header = dd->port->command_list +
@@ -182,7 +181,7 @@ static void mtip_init_cmd_header(struct request *rq)
 	cmd->command_header_dma = dd->port->command_list_dma +
 				(sizeof(struct mtip_cmd_hdr) * rq->tag);
 
-	if (host_cap_64)
+	if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
 		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
 
 	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
@@ -386,6 +385,7 @@ static void mtip_init_port(struct mtip_port *port)
 			 port->mmio + PORT_LST_ADDR_HI);
 		writel((port->rxfis_dma >> 16) >> 16,
 			 port->mmio + PORT_FIS_ADDR_HI);
+		set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
 	}
 
 	writel(port->command_list_dma & 0xFFFFFFFF,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e8286af50e16..e20e55dab443 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -140,6 +140,7 @@ enum {
 				(1 << MTIP_PF_SE_ACTIVE_BIT) |
 				(1 << MTIP_PF_DM_ACTIVE_BIT) |
 				(1 << MTIP_PF_TO_ACTIVE_BIT)),
+	MTIP_PF_HOST_CAP_64         = 10, /* cache HOST_CAP_64 */
 
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,
-- 
cgit v1.2.3-59-g8ed1b


From 5e599d73c1c1816af07f94ddba879499aa39b43c Mon Sep 17 00:00:00 2001
From: Marta Rybczynska <mrybczyn@kalray.eu>
Date: Tue, 6 Jun 2017 13:27:21 +0200
Subject: nvme-rdma: remove race conditions from IB signalling

This patch improves the way the RDMA IB signalling is done by using atomic
operations for the signalling variable. This avoids race conditions on
sig_count.

The signalling interval changes slightly and is now the largest power of
two not larger than queue depth / 2.

ilog() usage idea by Bart Van Assche.

Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
---
 drivers/nvme/host/rdma.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cfb22531fc16..0ab163e6e640 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags {
 
 struct nvme_rdma_queue {
 	struct nvme_rdma_qe	*rsp_ring;
-	u8			sig_count;
+	atomic_t		sig_count;
 	int			queue_size;
 	size_t			cmnd_capsule_len;
 	struct nvme_rdma_ctrl	*ctrl;
@@ -523,6 +523,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
 	queue->queue_size = queue_size;
+	atomic_set(&queue->sig_count, 0);
 
 	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
 			RDMA_PS_TCP, IB_QPT_RC);
@@ -1009,17 +1010,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
-static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+/*
+ * We want to signal completion at least every queue depth/2.  This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
 {
-	int sig_limit;
+	int limit = 1 << ilog2((queue->queue_size + 1) / 2);
 
-	/*
-	 * We signal completion every queue depth/2 and also handle the
-	 * degenerated case of a  device with queue_depth=1, where we
-	 * would need to signal every message.
-	 */
-	sig_limit = max(queue->queue_size / 2, 1);
-	return (++queue->sig_count % sig_limit) == 0;
+	return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
-- 
cgit v1.2.3-59-g8ed1b


From fb051339727cd8134dd6ef0305a120e6e265dded Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:33:32 +0300
Subject: nvme-rdma: quiesce/unquiesce admin_q instead of start/stop its hw
 queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.
Also make sure to kick the requeue list when appropriate.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0ab163e6e640..6f8f9ffcbc00 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -779,7 +779,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	if (ctrl->ctrl.queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 
 	/* We must take care of fastfail/requeue all our inflight requests */
 	if (ctrl->ctrl.queue_count > 1)
@@ -792,7 +792,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	 * queues are not a live anymore, so restart the queues to fail fast
 	 * new IO
 	 */
-	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_start_queues(&ctrl->ctrl);
 
 	nvme_rdma_reconnect_or_remove(ctrl);
@@ -1636,9 +1636,10 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 	if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f9c5af5f8ff14a31468546b9b1a876d537019e9a Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:39:34 +0300
Subject: nvme-fc: quiesce/unquiesce admin_q instead of start/stop its hw
 queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Also, make sure to unquiesce before cleanup the admin queue.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-By: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 50cc3b2b0e11..8d55e7827932 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1703,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref)
 	list_del(&ctrl->ctrl_list);
 	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
 
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 
@@ -2321,7 +2322,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_delete_hw_queue;
 
 	if (ctrl->ctrl.state != NVME_CTRL_NEW)
-		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
 	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 	if (ret)
@@ -2475,7 +2476,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 
-- 
cgit v1.2.3-59-g8ed1b


From c1c0ffff3a99caf40c3bd21447135db459194123 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:40:17 +0300
Subject: nvme-loop: quiesce/unquiesce admin_q instead of start/stop its hw
 queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 3d51341e62ee..6a0b70685e77 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -434,9 +434,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_loop_destroy_admin_queue(ctrl);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c81545f991a6612d3bdab18a71b3487023ec6b69 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:53:27 +0300
Subject: nvme-pci: quiesce/unquiesce admin_q instead of start/stop its hw
 queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index eb729ff70e7d..d9c0010a9bbc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1125,7 +1125,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
-		blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
 
 	pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
 
@@ -1315,7 +1315,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
 		 * user requests may be waiting on a stopped queue. Start the
 		 * queue to flush these to completion.
 		 */
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -1352,7 +1352,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 			return -ENODEV;
 		}
 	} else
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 8d7b8fafad87c3404f72ce2d36c79c48be1129a6 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 18:16:58 +0300
Subject: nvme: kick requeue list when requeueing a request instead of when
 starting the queues

When we requeue a request, we can always insert the request
back to the scheduler instead of doing it when restarting
the queues and kicking the requeue work, so get rid of
the requeue kick in nvme (core and drivers).

Also, now there is no need start hw queues in nvme_kill_queues
We don't stop the hw queues anymore, so no need to
start them.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d70df1d0072d..48cafaa6fbc5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req)
 {
 	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
 		nvme_req(req)->retries++;
-		blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+		blk_mq_requeue_request(req, true);
 		return;
 	}
 
@@ -2694,9 +2694,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
 	blk_mq_unquiesce_queue(ctrl->admin_q);
 
-	/* Forcibly start all queues to avoid having stuck requests */
-	blk_mq_start_hw_queues(ctrl->admin_q);
-
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2709,16 +2706,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 
 		/* Forcibly unquiesce queues to avoid blocking dispatch */
 		blk_mq_unquiesce_queue(ns->queue);
-
-		/*
-		 * Forcibly start all queues to avoid having stuck requests.
-		 * Note that we must ensure the queues are not stopped
-		 * when the final removal happens.
-		 */
-		blk_mq_start_hw_queues(ns->queue);
-
-		/* draining requests in requeue list */
-		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2787,10 +2774,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	list_for_each_entry(ns, &ctrl->namespaces, list)
 		blk_mq_unquiesce_queue(ns->queue);
-		blk_mq_kick_requeue_list(ns->queue);
-	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
-- 
cgit v1.2.3-59-g8ed1b


From b52c2e92546ee794a5bbab4d8ea435c1de85a8cb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 09:57:09 +0300
Subject: nbd: quiesce request queues to make sure no submissions are inflight

Unlike blk_mq_stop_hw_queues, blk_mq_quiesce_queue respects the
submission path rcu grace. quiesce the queue before iterating
on live tags.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 977ec960dd2f..dea7d85134ee 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 
 static void nbd_clear_que(struct nbd_device *nbd)
 {
-	blk_mq_stop_hw_queues(nbd->disk->queue);
+	blk_mq_quiesce_queue(nbd->disk->queue);
 	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
-	blk_mq_start_hw_queues(nbd->disk->queue);
+	blk_mq_unquiesce_queue(nbd->disk->queue);
 	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 436c15ab6596b12cfee7618ecaa69a4341ac3c51 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:00:41 +0300
Subject: mtip32xx: quiesce request queues to make sure no submissions are
 inflight

Unlike blk_mq_stop_hw_queues, blk_mq_quiesce_queue respects the
submission path rcu grace. quiesce the queue before iterating
on live tags, or performing device io quiescing.

While were at it, verify that the request started in mtip_abort_cmd
amd mtip_queue_cmd tag iteration calls.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/mtip32xx/mtip32xx.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f256ca..87717a1a5c89 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	unsigned long to;
 	bool active = true;
 
-	blk_mq_stop_hw_queues(port->dd->queue);
+	blk_mq_quiesce_queue(port->dd->queue);
 
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
@@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 			break;
 	} while (time_before(jiffies, to));
 
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return active ? -EBUSY : 0;
 err_fault:
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return -EFAULT;
 }
 
@@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data,
 {
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 }
@@ -2814,6 +2820,8 @@ restart_eh:
 				dev_warn(&dd->pdev->dev,
 					"Completion workers still active!");
 
+			blk_mq_quiesce_queue(dd->queue);
+
 			spin_lock(dd->queue->queue_lock);
 			blk_mq_tagset_busy_iter(&dd->tags,
 							mtip_queue_cmd, dd);
@@ -2826,6 +2834,8 @@ restart_eh:
 							mtip_abort_cmd, dd);
 
 			clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+
+			blk_mq_unquiesce_queue(dd->queue);
 		}
 
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
@@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd)
 						dd->disk->disk_name);
 
 	blk_freeze_queue_start(dd->queue);
-	blk_mq_stop_hw_queues(dd->queue);
+	blk_mq_quiesce_queue(dd->queue);
 	blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
+	blk_mq_unquiesce_queue(dd->queue);
 
 	/*
 	 * Delete our gendisk structure. This also removes the device
-- 
cgit v1.2.3-59-g8ed1b


From 9b3e99058453399b506087b3ac99d67a80343333 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:03:03 +0300
Subject: virtio_blk: quiesce/unquiesce live IO when entering PM states

Without it its not guaranteed that no .queue_rq is inflight.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: virtio-dev@lists.oasis-open.org
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/virtio_blk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7c1452..4e02aa5fdac0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vblk->config_work);
 
-	blk_mq_stop_hw_queues(vblk->disk->queue);
+	blk_mq_quiesce_queue(vblk->disk->queue);
 
 	vdev->config->del_vqs(vdev);
 	return 0;
@@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
-	blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	blk_mq_unquiesce_queue(vblk->disk->queue);
 	return 0;
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From d09f2b45f346f0a9e5e1b5fcea531b1b393671dc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 10:56:43 +0300
Subject: nvme: split nvme_uninit_ctrl into stop and uninit

Usually before we teardown the controller we want to:
1. complete/cancel any ctrl inflight works
2. remove ctrl namespaces (only for removal though, resets
   shouldn't remove any namespaces).

but we do not want to destroy the controller device as
we might use it for logging during the teardown stage.

This patch adds nvme_start_ctrl() which queues inflight
controller works (aen, ns scan, queue start and keep-alive
if kato is set) and nvme_stop_ctrl() which cancels the works
namespace removal is left to the callers to handle.

Move nvme_uninit_ctrl after we are done with the
controller device.

Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c   | 21 +++++++++++++++++++--
 drivers/nvme/host/fc.c     | 16 ++++------------
 drivers/nvme/host/nvme.h   |  2 ++
 drivers/nvme/host/pci.c    | 16 ++++------------
 drivers/nvme/host/rdma.c   | 29 ++++++++---------------------
 drivers/nvme/target/loop.c | 19 ++++++-------------
 6 files changed, 43 insertions(+), 60 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 48cafaa6fbc5..cb96f4a7ae3a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
 	spin_unlock(&dev_list_lock);
 }
 
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
+	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	flush_work(&ctrl->scan_work);
-	nvme_remove_namespaces(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
+
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+	if (ctrl->kato)
+		nvme_start_keep_alive(ctrl);
+
+	if (ctrl->queue_count > 1) {
+		nvme_queue_scan(ctrl);
+		nvme_queue_async_events(ctrl);
+		nvme_start_queues(ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
 
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
 	device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
 	spin_lock(&dev_list_lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8d55e7827932..d666ada39a9b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 out_delete_hw_queues:
 	nvme_fc_delete_hw_io_queues(ctrl);
 out_cleanup_blk_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	blk_mq_free_tag_set(&ctrl->tag_set);
@@ -2366,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_disconnect_admin_queue;
 	}
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	/* FC-NVME supports normal SGL Data Block Descriptors */
 
 	if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -2401,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return 0;	/* Success */
 
 out_term_aen_ops:
 	nvme_fc_term_aen_ops(ctrl);
-	nvme_stop_keep_alive(&ctrl->ctrl);
 out_disconnect_admin_queue:
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2434,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
 	unsigned long flags;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
 	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->flags |= FCCTRL_TERMIO;
 	ctrl->iocnt = 0;
@@ -2517,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
-
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	/*
 	 * kill the association on the link side.  this will block
 	 * waiting for io to terminate
@@ -2612,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e0b83311d5de..8f2a168ddc01 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		const struct nvme_ctrl_ops *ops, unsigned long quirks);
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d9c0010a9bbc..882ed3677117 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2134,15 +2134,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	/*
-	 * A controller that can not execute IO typically requires user
-	 * intervention to correct. For such degraded controllers, the driver
-	 * should not submit commands the user did not request, so skip
-	 * registering for asynchronous event notification on this condition.
-	 */
-	if (dev->online_queues > 1)
-		nvme_queue_async_events(&dev->ctrl);
-
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
 	 * any working I/O queue.
@@ -2163,8 +2154,7 @@ static void nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (dev->online_queues > 1)
-		nvme_queue_scan(&dev->ctrl);
+	nvme_start_ctrl(&dev->ctrl);
 	return;
 
  out:
@@ -2341,11 +2331,13 @@ static void nvme_remove(struct pci_dev *pdev)
 	}
 
 	flush_work(&dev->ctrl.reset_work);
-	nvme_uninit_ctrl(&dev->ctrl);
+	nvme_stop_ctrl(&dev->ctrl);
+	nvme_remove_namespaces(&dev->ctrl);
 	nvme_dev_disable(dev, true);
 	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
+	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_release_prp_pools(dev);
 	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6f8f9ffcbc00..ccdbd99d5a1c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -732,8 +732,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto requeue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
@@ -751,10 +749,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	WARN_ON_ONCE(!changed);
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -772,7 +767,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 			struct nvme_rdma_ctrl, err_work);
 	int i;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
 
 	for (i = 0; i < ctrl->ctrl.queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
@@ -1603,8 +1598,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -1622,7 +1615,6 @@ out_free_queue:
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
@@ -1645,10 +1637,12 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 
 static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	if (shutdown)
 		nvme_rdma_shutdown_ctrl(ctrl);
 
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	if (ctrl->ctrl.tagset) {
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(&ctrl->tag_set);
@@ -1710,6 +1704,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	int ret;
 	bool changed;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl);
 
 	ret = nvme_rdma_configure_admin_queue(ctrl);
@@ -1739,11 +1734,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -1931,15 +1922,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 
 out_remove_admin_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	nvme_rdma_destroy_admin_queue(ctrl);
 out_kfree_queues:
 	kfree(ctrl->queues);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 6a0b70685e77..717ed7ddb2f6 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -407,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -422,8 +420,6 @@ out_free_sq:
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
@@ -446,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 				struct nvme_loop_ctrl, delete_work);
 
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -495,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
 
 	ret = nvme_loop_configure_admin_queue(ctrl);
@@ -515,10 +514,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	nvme_queue_scan(&ctrl->ctrl);
-	nvme_queue_async_events(&ctrl->ctrl);
-
-	nvme_start_queues(&ctrl->ctrl);
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -653,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 
-- 
cgit v1.2.3-59-g8ed1b


From 842594c8775b585c58459e044708c0335b6aa6b7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 5 Jul 2017 09:56:13 +0300
Subject: nvme-rdma: unconditionally recycle the request mr

When our RDMA queue-pair is torn down with high load
of I/O traffic, we have no way of knowing if the
memory region was actually registered by the reg_mr
work request as it completion flushes with error (hw
might have done it or not).

So in order to not deal with all this uncertanty, we
simply recycle the MR in reinit_request.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ccdbd99d5a1c..da04df1af231 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -272,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	int ret = 0;
 
-	if (!req->mr->need_inval)
-		goto out;
-
 	ib_dereg_mr(req->mr);
 
 	req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
-- 
cgit v1.2.3-59-g8ed1b


From db2d153d7a3d64618659f19e645a3351d3a732f3 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Thu, 6 Jul 2017 18:00:07 +0300
Subject: null_blk: fix error flow for shared tags during module_init

In case we use shared tags feature, blk_mq_alloc_tag_set might fail
during module initialization. In that case, fail the load with a
suitable error code. Also move the tagset initialization process after
defining the amount of submission queues.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 71f4422eba81..85c24cace973 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -844,9 +844,6 @@ static int __init null_init(void)
 		queue_mode = NULL_Q_MQ;
 	}
 
-	if (queue_mode == NULL_Q_MQ && shared_tags)
-		null_init_tag_set(&tag_set);
-
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -858,11 +855,19 @@ static int __init null_init(void)
 	else if (!submit_queues)
 		submit_queues = 1;
 
+	if (queue_mode == NULL_Q_MQ && shared_tags) {
+		ret = null_init_tag_set(&tag_set);
+		if (ret)
+			return ret;
+	}
+
 	mutex_init(&lock);
 
 	null_major = register_blkdev(0, "nullb");
-	if (null_major < 0)
-		return null_major;
+	if (null_major < 0) {
+		ret = null_major;
+		goto err_tagset;
+	}
 
 	if (use_lightnvm) {
 		ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
@@ -891,6 +896,9 @@ err_dev:
 	kmem_cache_destroy(ppa_cache);
 err_ppa:
 	unregister_blkdev(null_major, "nullb");
+err_tagset:
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		blk_mq_free_tag_set(&tag_set);
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 0e2ff11311d1d4c85dd9ad9ba4347775f628e94a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Jul 2017 10:32:20 -0700
Subject: cciss: initialize struct scsi_req
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The changes in "block: Make most scsi_req_init() calls implicit" mean
that every driver that supports the generic scsi ioctls needs to
call scsi_req_init on newly allocated requests, but that commit didn't
add the call to the ccіss driver.  Fix that to avoid crashes when
udev issues SG_IO commands.

Fixes: ca18d6f7 ("block: Make most scsi_req_init() calls implicit")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/cciss.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 02a611993bb4..678af946be30 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1944,6 +1944,13 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
 	return;
 }
 
+static void cciss_initialize_rq(struct request *rq)
+{
+	struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(sreq);
+}
+
 /*
  * cciss_add_disk sets up the block device queue for a logical drive
  */
@@ -1956,6 +1963,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
+	disk->queue->initialize_rq_fn = cciss_initialize_rq;
 	disk->queue->queue_lock = &h->lock;
 	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
-- 
cgit v1.2.3-59-g8ed1b


From 3eaa11e2780dc38350c133bd998cac1df488d040 Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Fri, 7 Jul 2017 21:08:52 +0200
Subject: lightnvm: pblk: control I/O flow also on tear down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When removing a pblk instance, control the write I/O flow to the
controller as we do in the fast path.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-core.c     | 61 +++++++++++++++++++++++++++++++---------
 drivers/lightnvm/pblk-recovery.c | 31 +++++++++++++-------
 drivers/lightnvm/pblk-write.c    | 19 +++++--------
 drivers/lightnvm/pblk.h          |  2 ++
 4 files changed, 78 insertions(+), 35 deletions(-)

(limited to 'drivers')

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 11fe0c5b2a9c..81501644fb15 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1670,13 +1670,10 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 	queue_work(wq, &line_ws->ws);
 }
 
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
-		  unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+			     int nr_ppas, int pos)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	struct pblk_lun *rlun;
-	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+	struct pblk_lun *rlun = &pblk->luns[pos];
 	int ret;
 
 	/*
@@ -1690,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
 				ppa_list[0].g.ch != ppa_list[i].g.ch);
 #endif
-	/* If the LUN has been locked for this same request, do no attempt to
-	 * lock it again
-	 */
-	if (test_and_set_bit(pos, lun_bitmap))
-		return;
 
-	rlun = &pblk->luns[pos];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
 	if (ret) {
 		switch (ret) {
 		case -ETIME:
@@ -1710,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	}
 }
 
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		  unsigned long *lun_bitmap)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	/* If the LUN has been locked for this same request, do no attempt to
+	 * lock it again
+	 */
+	if (test_and_set_bit(pos, lun_bitmap))
+		return;
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+	int i;
+
+	for (i = 1; i < nr_ppas; i++)
+		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+				ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+	rlun = &pblk->luns[pos];
+	up(&rlun->wr_sem);
+}
+
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap)
 {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 0e48d3e4e143..cb556e06673e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -340,9 +340,14 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
 	struct pblk *pblk = pad_rq->pblk;
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-	kref_put(&pad_rq->ref, pblk_recov_complete);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+	bio_put(rqd->bio);
 	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, WRITE);
+
+	atomic_dec(&pblk->inflight_io);
+	kref_put(&pad_rq->ref, pblk_recov_complete);
 }
 
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
@@ -385,7 +390,7 @@ next_pad_rq:
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (rq_ppas < pblk->min_write_pgs) {
 		pr_err("pblk: corrupted pad line %d\n", line->id);
-		goto free_rq;
+		goto fail_free_pad;
 	}
 
 	rq_len = rq_ppas * geo->sec_size;
@@ -393,7 +398,7 @@ next_pad_rq:
 	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
 	if (!meta_list) {
 		ret = -ENOMEM;
-		goto free_data;
+		goto fail_free_pad;
 	}
 
 	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
@@ -404,9 +409,9 @@ next_pad_rq:
 		ret = PTR_ERR(rqd);
 		goto fail_free_meta;
 	}
-	memset(rqd, 0, pblk_w_rq_size);
 
-	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+						PBLK_VMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
 		goto fail_free_rqd;
@@ -453,15 +458,15 @@ next_pad_rq:
 	}
 
 	kref_get(&pad_rq->ref);
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: I/O submission failed: %d\n", ret);
-		goto free_data;
+		pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+		goto fail_free_bio;
 	}
 
-	atomic_dec(&pblk->inflight_io);
-
 	left_line_ppas -= rq_ppas;
 	left_ppas -= rq_ppas;
 	if (left_ppas && left_line_ppas)
@@ -475,17 +480,23 @@ next_pad_rq:
 		ret = -ETIME;
 	}
 
+	if (!pblk_line_is_full(line))
+		pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+	vfree(data);
 free_rq:
 	kfree(pad_rq);
-free_data:
-	vfree(data);
 	return ret;
 
+fail_free_bio:
+	bio_put(bio);
 fail_free_rqd:
 	pblk_free_rqd(pblk, rqd, WRITE);
 fail_free_meta:
 	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
 	kfree(pad_rq);
+	vfree(data);
 	return ret;
 }
 
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index d62a8f4faaf4..cc2b9414d17c 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -178,15 +178,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_line *line = m_ctx->private;
 	struct pblk_emeta *emeta = line->emeta;
-	int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
-	struct pblk_lun *rlun = &pblk->luns[pos];
 	int sync;
 
-	up(&rlun->wr_sem);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	if (rqd->error) {
 		pblk_log_write_err(pblk, rqd);
@@ -203,6 +200,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 								pblk->close_wq);
 
 	bio_put(rqd->bio);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, READ);
 
 	atomic_dec(&pblk->inflight_io);
@@ -367,7 +365,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_emeta *emeta = meta_line->emeta;
 	struct pblk_g_ctx *m_ctx;
-	struct pblk_lun *rlun;
 	struct bio *bio;
 	struct nvm_rq *rqd;
 	void *data;
@@ -411,13 +408,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
 	}
 
-	rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
-	if (ret) {
-		pr_err("pblk: lun semaphore timed out (%d)\n", ret);
-		goto fail_free_bio;
-	}
-
 	emeta->mem += rq_len;
 	if (emeta->mem >= lm->emeta_len[0]) {
 		spin_lock(&l_mg->close_lock);
@@ -427,6 +417,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 		spin_unlock(&l_mg->close_lock);
 	}
 
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
@@ -436,10 +428,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	return NVM_IO_OK;
 
 fail_rollback:
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 	spin_lock(&l_mg->close_lock);
 	pblk_dealloc_page(pblk, meta_line, rq_ppas);
 	list_add(&meta_line->list, &meta_line->list);
 	spin_unlock(&l_mg->close_lock);
+
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 fail_free_bio:
 	if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
 		bio_put(bio);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 15931381348c..0c5692cc2f60 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -739,8 +739,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		  unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap);
 void pblk_end_bio_sync(struct bio *bio);
-- 
cgit v1.2.3-59-g8ed1b


From 56c76417ad310a060252a13f88001c35b73d241d Mon Sep 17 00:00:00 2001
From: Javier González <jg@lightnvm.io>
Date: Fri, 7 Jul 2017 21:08:53 +0200
Subject: lightnvm: pblk: remove unnecessary checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove unnecessary checks when freeing dma memory in the completion
path.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-write.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index cc2b9414d17c..3ad9e56d2473 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -39,9 +39,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
 
-	if (rqd->meta_list)
-		nvm_dev_dma_free(dev->parent, rqd->meta_list,
-							rqd->dma_meta_list);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
 	bio_put(rqd->bio);
 	pblk_free_rqd(pblk, rqd, WRITE);
@@ -224,9 +222,6 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	if (!rqd->meta_list)
 		return -ENOMEM;
 
-	if (unlikely(nr_secs == 1))
-		return 0;
-
 	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
 	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
-- 
cgit v1.2.3-59-g8ed1b


From 4cb7ca8073e1f226487168155c9a887079d605e4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 7 Jul 2017 18:08:26 -0700
Subject: nvmet_fc: Accept variable pad lengths on Create Association LS

Target validation of the Create Association LS revised to accept any
LS as long as all non-pad data has been received. This allows a (newer)
target to accept the LS from older initiators with varying pad lengths.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/fc.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 7692a96c9065..1e6dcc241b3c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 
 	memset(acc, 0, sizeof(*acc));
 
-	if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst))
+	/*
+	 * FC-NVME spec changes. There are initiators sending different
+	 * lengths as padding sizes for Create Association Cmd descriptor
+	 * was incorrect.
+	 * Accept anything of "minimum" length. Assume format per 1.15
+	 * spec (with HOSTID reduced to 16 bytes), ignore how long the
+	 * trailing pad length is.
+	 */
+	if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
 		ret = VERR_CR_ASSOC_LEN;
-	else if (rqst->desc_list_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_ls_cr_assoc_rqst)))
+	else if (rqst->desc_list_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
 		ret = VERR_CR_ASSOC_RQST_LEN;
 	else if (rqst->assoc_cmd.desc_tag !=
 			cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
 		ret = VERR_CR_ASSOC_CMD;
-	else if (rqst->assoc_cmd.desc_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)))
+	else if (rqst->assoc_cmd.desc_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
 		ret = VERR_CR_ASSOC_CMD_LEN;
 	else if (!rqst->assoc_cmd.ersp_ratio ||
 		 (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=
-- 
cgit v1.2.3-59-g8ed1b


From 2ee0e4ed5ca24c0642a7b72d75b4fe6dfc0a8db8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 6 Jul 2017 12:26:52 +0300
Subject: nvme-pci: compile warnings in nvme_alloc_host_mem()

"i" should be signed or it could cause a forever loop on the cleanup
path. "size" can be used uninitialized.

Fixes: 87ad72a59a38 ("nvme-pci: implement host memory buffer support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 882ed3677117..73fddf2c99f9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1582,9 +1582,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
 	struct nvme_host_mem_buf_desc *descs;
-	u32 chunk_size, max_entries, i = 0;
+	u32 chunk_size, max_entries;
+	int i = 0;
 	void **bufs;
-	u64 size, tmp;
+	u64 size = 0, tmp;
 
 	/* start big and work our way down */
 	chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
-- 
cgit v1.2.3-59-g8ed1b


From b27c1e683d2c8cd666a042b02096d18237911a37 Mon Sep 17 00:00:00 2001
From: weiping zhang <zhangweiping@didichuxing.com>
Date: Mon, 10 Jul 2017 16:46:59 +0800
Subject: nvme-pci: add module parameter for io queue depth

Adjust io queue depth more easily, and make sure io queue depth >= 2.

Signed-off-by: weiping zhang <zhangweiping@didichuxing.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 73fddf2c99f9..48d3ed3d48d1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -35,7 +35,6 @@
 
 #include "nvme.h"
 
-#define NVME_Q_DEPTH		1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
 	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+	.set = io_queue_depth_set,
+	.get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
 struct nvme_dev;
 struct nvme_queue;
 
@@ -104,6 +113,17 @@ struct nvme_dev {
 	void **host_mem_desc_bufs;
 };
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+	int n = 0, ret;
+
+	ret = kstrtoint(val, 10, &n);
+	if (ret != 0 || n < 2)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
 {
 	return qid * 2 * stride;
@@ -1893,7 +1913,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
 	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
-				NVME_Q_DEPTH);
+				io_queue_depth);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
-- 
cgit v1.2.3-59-g8ed1b


From c2f30f08c115a6bb9061afb4ba314f7e41f20686 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 10 Jul 2017 17:24:02 +0300
Subject: nvmet: avoid unneeded assignment of submit_bio return value

We actually using the cookie returned from the last submit_bio
call.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/io-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 40128793e613..3b4d47a6abdb 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 			bio_set_op_attrs(bio, op, op_flags);
 
 			bio_chain(bio, prev);
-			cookie = submit_bio(prev);
+			submit_bio(prev);
 		}
 
 		sector += sg->length >> 9;
-- 
cgit v1.2.3-59-g8ed1b