aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r--drivers/nvme/host/tcp.c144
1 files changed, 101 insertions, 43 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 606b13d35d16..4ffd5957637a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -13,6 +13,7 @@
#include <net/tcp.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
+#include <net/busy_poll.h>
#include "nvme.h"
#include "fabrics.h"
@@ -72,6 +73,7 @@ struct nvme_tcp_queue {
int pdu_offset;
size_t data_remaining;
size_t ddgst_remaining;
+ unsigned int nr_cqe;
/* send state */
struct nvme_tcp_request *request;
@@ -438,6 +440,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
}
nvme_end_request(rq, cqe->status, cqe->result);
+ queue->nr_cqe++;
return 0;
}
@@ -608,23 +611,18 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
switch (hdr->type) {
case nvme_tcp_c2h_data:
- ret = nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
case nvme_tcp_rsp:
nvme_tcp_init_recv_ctx(queue);
- ret = nvme_tcp_handle_comp(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
case nvme_tcp_r2t:
nvme_tcp_init_recv_ctx(queue);
- ret = nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
- break;
+ return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
default:
dev_err(queue->ctrl->ctrl.device,
"unsupported pdu type (%d)\n", hdr->type);
return -EINVAL;
}
-
- return ret;
}
static inline void nvme_tcp_end_request(struct request *rq, u16 status)
@@ -701,8 +699,10 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
- if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS)
+ if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ queue->nr_cqe++;
+ }
nvme_tcp_init_recv_ctx(queue);
}
}
@@ -742,6 +742,7 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ queue->nr_cqe++;
}
nvme_tcp_init_recv_ctx(queue);
@@ -841,7 +842,7 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
{
- nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR);
+ nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
}
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
@@ -1023,14 +1024,16 @@ done:
static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
{
- struct sock *sk = queue->sock->sk;
+ struct socket *sock = queue->sock;
+ struct sock *sk = sock->sk;
read_descriptor_t rd_desc;
int consumed;
rd_desc.arg.data = queue;
rd_desc.count = 1;
lock_sock(sk);
- consumed = tcp_read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
+ queue->nr_cqe = 0;
+ consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
return consumed;
}
@@ -1255,7 +1258,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->queue_size = queue_size;
if (qid > 0)
- queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
+ queue->cmnd_capsule_len = nctrl->ioccsz * 16;
else
queue->cmnd_capsule_len = sizeof(struct nvme_command) +
NVME_TCP_ADMIN_CCSZ;
@@ -1263,7 +1266,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &queue->sock);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to create socket: %d\n", ret);
return ret;
}
@@ -1273,7 +1276,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
(char *)&opt, sizeof(opt));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set TCP_SYNCNT sock opt %d\n", ret);
goto err_sock;
}
@@ -1283,7 +1286,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
TCP_NODELAY, (char *)&opt, sizeof(opt));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set TCP_NODELAY sock opt %d\n", ret);
goto err_sock;
}
@@ -1296,11 +1299,23 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
(char *)&sol, sizeof(sol));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to set SO_LINGER sock opt %d\n", ret);
goto err_sock;
}
+ /* Set socket type of service */
+ if (nctrl->opts->tos >= 0) {
+ opt = nctrl->opts->tos;
+ ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
+ (char *)&opt, sizeof(opt));
+ if (ret) {
+ dev_err(nctrl->device,
+ "failed to set IP_TOS sock opt %d\n", ret);
+ goto err_sock;
+ }
+ }
+
queue->sock->sk->sk_allocation = GFP_ATOMIC;
if (!qid)
n = 0;
@@ -1314,11 +1329,11 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->pdu_offset = 0;
sk_set_memalloc(queue->sock->sk);
- if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
+ if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
sizeof(ctrl->src_addr));
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to bind queue %d socket %d\n",
qid, ret);
goto err_sock;
@@ -1330,7 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
if (queue->hdr_digest || queue->data_digest) {
ret = nvme_tcp_alloc_crypto(queue);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to allocate queue %d crypto\n", qid);
goto err_sock;
}
@@ -1344,13 +1359,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_crypto;
}
- dev_dbg(ctrl->ctrl.device, "connecting queue %d\n",
+ dev_dbg(nctrl->device, "connecting queue %d\n",
nvme_tcp_queue_id(queue));
ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
sizeof(ctrl->addr), 0);
if (ret) {
- dev_err(ctrl->ctrl.device,
+ dev_err(nctrl->device,
"failed to connect socket: %d\n", ret);
goto err_rcv_pdu;
}
@@ -1371,6 +1386,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+ queue->sock->sk->sk_ll_usec = 1;
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
@@ -1469,7 +1485,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- set->nr_maps = 2 /* default + read */;
+ set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
}
ret = blk_mq_alloc_tag_set(set);
@@ -1568,6 +1584,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
+ nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
return nr_io_queues;
}
@@ -1599,6 +1616,12 @@ static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
+
+ if (opts->nr_poll_queues && nr_io_queues) {
+ /* map dedicated poll queues only if we have queues left */
+ ctrl->io_queues[HCTX_TYPE_POLL] =
+ min(opts->nr_poll_queues, nr_io_queues);
+ }
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
@@ -1680,6 +1703,7 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
nvme_tcp_stop_queue(ctrl, 0);
if (remove) {
blk_cleanup_queue(ctrl->admin_q);
+ blk_cleanup_queue(ctrl->fabrics_q);
blk_mq_free_tag_set(ctrl->admin_tagset);
}
nvme_tcp_free_admin_queue(ctrl);
@@ -1700,10 +1724,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
goto out_free_queue;
}
+ ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
+ if (IS_ERR(ctrl->fabrics_q)) {
+ error = PTR_ERR(ctrl->fabrics_q);
+ goto out_free_tagset;
+ }
+
ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
if (IS_ERR(ctrl->admin_q)) {
error = PTR_ERR(ctrl->admin_q);
- goto out_free_tagset;
+ goto out_cleanup_fabrics_q;
}
}
@@ -1711,19 +1741,12 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (error)
goto out_cleanup_queue;
- error = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
- if (error) {
- dev_err(ctrl->device,
- "prop_get NVME_REG_CAP failed\n");
- goto out_stop_queue;
- }
-
- ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
-
- error = nvme_enable_ctrl(ctrl, ctrl->cap);
+ error = nvme_enable_ctrl(ctrl);
if (error)
goto out_stop_queue;
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+
error = nvme_init_identify(ctrl);
if (error)
goto out_stop_queue;
@@ -1735,6 +1758,9 @@ out_stop_queue:
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->admin_q);
+out_cleanup_fabrics_q:
+ if (new)
+ blk_cleanup_queue(ctrl->fabrics_q);
out_free_tagset:
if (new)
blk_mq_free_tag_set(ctrl->admin_tagset);
@@ -1748,10 +1774,13 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
{
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
- if (ctrl->admin_tagset)
+ if (ctrl->admin_tagset) {
blk_mq_tagset_busy_iter(ctrl->admin_tagset,
nvme_cancel_request, ctrl);
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ blk_mq_tagset_wait_completed_request(ctrl->admin_tagset);
+ }
+ if (remove)
+ blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_tcp_destroy_admin_queue(ctrl, remove);
}
@@ -1762,9 +1791,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
return;
nvme_stop_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
- if (ctrl->tagset)
+ if (ctrl->tagset) {
blk_mq_tagset_busy_iter(ctrl->tagset,
nvme_cancel_request, ctrl);
+ blk_mq_tagset_wait_completed_request(ctrl->tagset);
+ }
if (remove)
nvme_start_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
@@ -1793,7 +1824,7 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
{
struct nvmf_ctrl_options *opts = ctrl->opts;
- int ret = -EINVAL;
+ int ret;
ret = nvme_tcp_configure_admin_queue(ctrl, new);
if (ret)
@@ -1876,6 +1907,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
/* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false);
+ blk_mq_unquiesce_queue(ctrl->admin_q);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we're in DELETING state */
@@ -1892,10 +1924,11 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
nvme_tcp_teardown_io_queues(ctrl, shutdown);
+ blk_mq_quiesce_queue(ctrl->admin_q);
if (shutdown)
nvme_shutdown_ctrl(ctrl);
else
- nvme_disable_ctrl(ctrl, ctrl->cap);
+ nvme_disable_ctrl(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, shutdown);
}
@@ -2151,14 +2184,36 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+ if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
+ /* map dedicated poll queues only if we have queues left */
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->io_queues[HCTX_TYPE_POLL];
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ];
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ }
+
dev_info(ctrl->ctrl.device,
- "mapped %d/%d default/read queues.\n",
+ "mapped %d/%d/%d default/read/poll queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
- ctrl->io_queues[HCTX_TYPE_READ]);
+ ctrl->io_queues[HCTX_TYPE_READ],
+ ctrl->io_queues[HCTX_TYPE_POLL]);
return 0;
}
+static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct nvme_tcp_queue *queue = hctx->driver_data;
+ struct sock *sk = queue->sock->sk;
+
+ if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue))
+ sk_busy_loop(sk, true);
+ nvme_tcp_try_recv(queue);
+ return queue->nr_cqe;
+}
+
static struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
@@ -2167,6 +2222,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
.init_hctx = nvme_tcp_init_hctx,
.timeout = nvme_tcp_timeout,
.map_queues = nvme_tcp_map_queues,
+ .poll = nvme_tcp_poll,
};
static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
@@ -2220,7 +2276,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
INIT_LIST_HEAD(&ctrl->list);
ctrl->ctrl.opts = opts;
- ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
+ opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
@@ -2314,7 +2371,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
- NVMF_OPT_NR_WRITE_QUEUES,
+ NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
+ NVMF_OPT_TOS,
.create_ctrl = nvme_tcp_create_ctrl,
};