aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/rdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r--drivers/nvme/host/rdma.c127
1 files changed, 97 insertions, 30 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 5a8388177959..f587af345889 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -28,7 +28,6 @@
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
-#include <rdma/ib_cm.h>
#include <linux/nvme-rdma.h>
#include "nvme.h"
@@ -43,6 +42,28 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
+static const char *const nvme_rdma_cm_status_strs[] = {
+ [NVME_RDMA_CM_INVALID_LEN] = "invalid length",
+ [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
+ [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
+ [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
+ [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
+ [NVME_RDMA_CM_NO_RSC] = "resource not found",
+ [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
+ [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
+};
+
+static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+ size_t index = status;
+
+ if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
+ nvme_rdma_cm_status_strs[index])
+ return nvme_rdma_cm_status_strs[index];
+ else
+ return "unrecognized reason";
+};
+
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
@@ -66,6 +87,7 @@ struct nvme_rdma_qe {
struct nvme_rdma_queue;
struct nvme_rdma_request {
+ struct nvme_request req;
struct ib_mr *mr;
struct nvme_rdma_qe sqe;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
@@ -83,6 +105,7 @@ enum nvme_rdma_queue_flags {
NVME_RDMA_Q_CONNECTED = (1 << 0),
NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
NVME_RDMA_Q_DELETING = (1 << 2),
+ NVME_RDMA_Q_LIVE = (1 << 3),
};
struct nvme_rdma_queue {
@@ -240,7 +263,9 @@ out_free_ring:
static void nvme_rdma_qp_event(struct ib_event *event, void *context)
{
- pr_debug("QP event %d\n", event->event);
+ pr_debug("QP event %s (%d)\n",
+ ib_event_msg(event->event), event->event);
+
}
static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
@@ -624,10 +649,18 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
for (i = 1; i < ctrl->queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
- if (ret)
- break;
+ if (ret) {
+ dev_info(ctrl->ctrl.device,
+ "failed to connect i/o queue: %d\n", ret);
+ goto out_free_queues;
+ }
+ set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
}
+ return 0;
+
+out_free_queues:
+ nvme_rdma_free_io_queues(ctrl);
return ret;
}
@@ -712,6 +745,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ret)
goto stop_admin_q;
+ set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
+
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
if (ret)
goto stop_admin_q;
@@ -761,8 +796,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_stop_keep_alive(&ctrl->ctrl);
- for (i = 0; i < ctrl->queue_count; i++)
+ for (i = 0; i < ctrl->queue_count; i++) {
clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+ clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
+ }
if (ctrl->queue_count > 1)
nvme_stop_queues(&ctrl->ctrl);
@@ -950,8 +987,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
- int nents, count;
- int ret;
+ int count, ret;
req->num_sge = 1;
req->inline_data = false;
@@ -963,16 +999,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
return nvme_rdma_set_sg_null(c);
req->sg_table.sgl = req->first_sgl;
- ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments,
- req->sg_table.sgl);
+ ret = sg_alloc_table_chained(&req->sg_table,
+ blk_rq_nr_phys_segments(rq), req->sg_table.sgl);
if (ret)
return -ENOMEM;
- nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
- BUG_ON(nents > rq->nr_phys_segments);
- req->nents = nents;
+ req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
- count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents,
+ count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(count <= 0)) {
sg_free_table_chained(&req->sg_table, true);
@@ -1117,13 +1151,10 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
struct nvme_completion *cqe, struct ib_wc *wc, int tag)
{
- u16 status = le16_to_cpu(cqe->status);
struct request *rq;
struct nvme_rdma_request *req;
int ret = 0;
- status >>= 1;
-
rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
@@ -1134,9 +1165,6 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
}
req = blk_mq_rq_to_pdu(rq);
- if (rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->special)
- memcpy(rq->special, cqe, sizeof(*cqe));
-
if (rq->tag == tag)
ret = 1;
@@ -1144,8 +1172,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
wc->ex.invalidate_rkey == req->mr->rkey)
req->mr->need_inval = false;
- blk_mq_complete_request(rq, status);
-
+ req->req.result = cqe->result;
+ blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
return ret;
}
@@ -1173,7 +1201,8 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
*/
if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH))
- nvme_complete_async_event(&queue->ctrl->ctrl, cqe);
+ nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
+ &cqe->result);
else
ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
@@ -1207,16 +1236,24 @@ out_destroy_queue_ib:
static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
struct rdma_cm_event *ev)
{
- if (ev->param.conn.private_data_len) {
- struct nvme_rdma_cm_rej *rej =
- (struct nvme_rdma_cm_rej *)ev->param.conn.private_data;
+ struct rdma_cm_id *cm_id = queue->cm_id;
+ int status = ev->status;
+ const char *rej_msg;
+ const struct nvme_rdma_cm_rej *rej_data;
+ u8 rej_data_len;
+
+ rej_msg = rdma_reject_msg(cm_id, status);
+ rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
+
+ if (rej_data && rej_data_len >= sizeof(u16)) {
+ u16 sts = le16_to_cpu(rej_data->sts);
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, status %d.", le16_to_cpu(rej->sts));
- /* XXX: Think of something clever to do here... */
+ "Connect rejected: status %d (%s) nvme status %d (%s).\n",
+ status, rej_msg, sts, nvme_rdma_cm_msg(sts));
} else {
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, no private data.\n");
+ "Connect rejected: status %d (%s).\n", status, rej_msg);
}
return -ECONNRESET;
@@ -1378,6 +1415,24 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
return BLK_EH_HANDLED;
}
+/*
+ * We cannot accept any other command until the Connect command has completed.
+ */
+static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
+ struct request *rq)
+{
+ if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
+ struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+
+ if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
+ cmd->common.opcode != nvme_fabrics_command ||
+ cmd->fabrics.fctype != nvme_fabrics_type_connect)
+ return false;
+ }
+
+ return true;
+}
+
static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -1394,15 +1449,17 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0);
+ if (!nvme_rdma_queue_is_ready(queue, rq))
+ return BLK_MQ_RQ_QUEUE_BUSY;
+
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c);
- if (ret)
+ if (ret != BLK_MQ_RQ_QUEUE_OK)
return ret;
- c->common.command_id = rq->tag;
blk_mq_start_request(rq);
map_len = nvme_map_len(rq);
@@ -1544,6 +1601,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
if (error)
goto out_cleanup_queue;
+ set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
+
error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
if (error) {
dev_err(ctrl->ctrl.device,
@@ -1908,6 +1967,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
opts->queue_size = ctrl->ctrl.maxcmd;
}
+ if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
+ /* warn if sqsize is lower than queue_size */
+ dev_warn(ctrl->ctrl.device,
+ "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ opts->queue_size, ctrl->ctrl.sqsize + 1);
+ opts->queue_size = ctrl->ctrl.sqsize + 1;
+ }
+
if (opts->nr_io_queues) {
ret = nvme_rdma_create_io_queues(ctrl);
if (ret)