From a2268cfbf599e7f55d4ee68193f08b4f44535fac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:34:32 -0400 Subject: xprtrdma: Add proper SPDX tags for NetApp-contributed source Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3d3b423fa9c1..3490a87bc69a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. -- cgit v1.2.3-59-g8ed1b From edb41e61a54ee75fae31302775e0301fdcb0caaa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:09 -0400 Subject: xprtrdma: Make rpc_rqst part of rpcrdma_req This simplifies allocation of the generic RPC slot and xprtrdma specific per-RPC resources. It also makes xprtrdma more like the socket-based transports: ->buf_alloc and ->buf_free are now responsible only for send and receive buffers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprtrdma/backchannel.c | 77 ++++++++++++++++++--------------------- net/sunrpc/xprtrdma/transport.c | 35 +++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 9 +---- 4 files changed, 46 insertions(+), 76 deletions(-) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 706eef12bbc0..336fd1a19cca 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,7 +84,6 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - void *rq_xprtdata; /* Per-xprt private data */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 47ebac949769..4034788c9856 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -29,29 +29,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, spin_unlock(&buf->rb_reqslock); rpcrdma_destroy_req(req); - - kfree(rqst); } -static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, - struct rpc_rqst *rqst) +static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, + unsigned int count) { - struct rpcrdma_regbuf *rb; - struct rpcrdma_req *req; - size_t size; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpc_rqst *rqst; + unsigned int i; + + for (i = 0; i < (count << 1); i++) { + struct rpcrdma_regbuf *rb; + struct rpcrdma_req *req; + size_t size; + + req = rpcrdma_create_req(r_xprt); + if (IS_ERR(req)) + return PTR_ERR(req); + rqst = &req->rl_slot; + + rqst->rq_xprt = xprt; + INIT_LIST_HEAD(&rqst->rq_list); + INIT_LIST_HEAD(&rqst->rq_bc_list); + __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); - req = rpcrdma_create_req(r_xprt); - if (IS_ERR(req)) - return PTR_ERR(req); - - size = r_xprt->rx_data.inline_rsize; - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); - if (IS_ERR(rb)) - goto out_fail; - req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, - min_t(size_t, size, PAGE_SIZE)); - rpcrdma_set_xprtdata(rqst, req); + size = r_xprt->rx_data.inline_rsize; + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); + if (IS_ERR(rb)) + goto out_fail; + req->rl_sendbuf = rb; + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, + min_t(size_t, size, PAGE_SIZE)); + } return 0; out_fail: @@ -86,9 +98,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; - struct rpc_rqst *rqst; - unsigned int i; int rc; /* The backchannel reply path returns each rpc_rqst to the @@ -103,25 +112,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) if (reqs > RPCRDMA_BACKWARD_WRS >> 1) goto out_err; - for (i = 0; i < (reqs << 1); i++) { - rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); - if (!rqst) - goto out_free; - - dprintk("RPC: %s: new rqst %p\n", __func__, rqst); - - rqst->rq_xprt = &r_xprt->rx_xprt; - INIT_LIST_HEAD(&rqst->rq_list); - INIT_LIST_HEAD(&rqst->rq_bc_list); - __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); - - if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) - goto out_free; - - spin_lock_bh(&xprt->bc_pa_lock); - list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); - } + rc = rpcrdma_bc_setup_reqs(r_xprt, reqs); + if (rc) + goto out_free; rc = rpcrdma_bc_setup_reps(r_xprt, reqs); if (rc) @@ -131,7 +124,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) if (rc) goto out_free; - buffer->rb_bc_srv_max_requests = reqs; + r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; request_module("svcrdma"); trace_xprtrdma_cb_setup(r_xprt, reqs); return 0; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8f9338e98c4f..79885aa39c24 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -331,9 +331,7 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), - xprt_rdma_slot_table_entries, - xprt_rdma_slot_table_entries); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); @@ -365,7 +363,7 @@ xprt_setup_rdma(struct xprt_create *args) xprt_set_bound(xprt); xprt_rdma_format_addresses(xprt, sap); - cdata.max_requests = xprt->max_reqs; + cdata.max_requests = xprt_rdma_slot_table_entries; cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ @@ -550,22 +548,18 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) static void xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_rqst *rqst; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req; - spin_lock(&xprt->reserve_lock); - if (list_empty(&xprt->free)) + req = rpcrdma_buffer_get(&r_xprt->rx_buf); + if (!req) goto out_sleep; - rqst = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); - list_del(&rqst->rq_list); - spin_unlock(&xprt->reserve_lock); - - task->tk_rqstp = rqst; + task->tk_rqstp = &req->rl_slot; task->tk_status = 0; return; out_sleep: rpc_sleep_on(&xprt->backlog, task, NULL); - spin_unlock(&xprt->reserve_lock); task->tk_status = -EAGAIN; } @@ -579,11 +573,8 @@ static void xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) { memset(rqst, 0, sizeof(*rqst)); - - spin_lock(&xprt->reserve_lock); - list_add(&rqst->rq_list, &xprt->free); + rpcrdma_buffer_put(rpcr_to_rdmar(rqst)); rpc_wake_up_next(&xprt->backlog); - spin_unlock(&xprt->reserve_lock); } static bool @@ -656,13 +647,9 @@ xprt_rdma_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - struct rpcrdma_req *req; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); gfp_t flags; - req = rpcrdma_buffer_get(&r_xprt->rx_buf); - if (req == NULL) - goto out_get; - flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -672,15 +659,12 @@ xprt_rdma_allocate(struct rpc_task *task) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; trace_xprtrdma_allocate(task, req); return 0; out_fail: - rpcrdma_buffer_put(req); -out_get: trace_xprtrdma_allocate(task, NULL); return -ENOMEM; } @@ -701,7 +685,6 @@ xprt_rdma_free(struct rpc_task *task) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); trace_xprtrdma_rpc_done(task, req); - rpcrdma_buffer_put(req); } /** diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3490a87bc69a..1d7bb6e5db18 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -335,6 +335,7 @@ enum { struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_list; + struct rpc_rqst rl_slot; struct rpcrdma_buffer *rl_buffer; struct rpcrdma_rep *rl_reply; struct xdr_stream rl_stream; @@ -357,16 +358,10 @@ enum { RPCRDMA_REQ_F_TX_RESOURCES, }; -static inline void -rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) -{ - rqst->rq_xprtdata = req; -} - static inline struct rpcrdma_req * rpcr_to_rdmar(const struct rpc_rqst *rqst) { - return rqst->rq_xprtdata; + return container_of(rqst, struct rpcrdma_req, rl_slot); } static inline void -- cgit v1.2.3-59-g8ed1b From 7c8d9e7c8863905951d4eaa7a8d277150f3a37f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:20 -0400 Subject: xprtrdma: Move Receive posting to Receive handler Receive completion and Reply handling are done by a BOUND workqueue, meaning they run on only one CPU. Posting receives is currently done in the send_request path, which on large systems is typically done on a different CPU than the one handling Receive completions. This results in movement of Receive-related cachelines between the sending and receiving CPUs. More importantly, it means that currently Receives are posted while the transport's write lock is held, which is unnecessary and costly. Finally, allocation of Receive buffers is performed on-demand in the Receive completion handler. This helps guarantee that they are allocated on the same NUMA node as the CPU that handles Receive completions. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 40 +++++++-- net/sunrpc/xprtrdma/backchannel.c | 32 ++----- net/sunrpc/xprtrdma/rpc_rdma.c | 22 ++--- net/sunrpc/xprtrdma/transport.c | 3 - net/sunrpc/xprtrdma/verbs.c | 176 +++++++++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 6 +- 6 files changed, 150 insertions(+), 129 deletions(-) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 99c0049e51a5..ad27e192cdf8 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -546,6 +546,39 @@ TRACE_EVENT(xprtrdma_post_recv, ) ); +TRACE_EVENT(xprtrdma_post_recvs, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned int count, + int status + ), + + TP_ARGS(r_xprt, count, status), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, count) + __field(int, status) + __field(int, posted) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->count = count; + __entry->status = status; + __entry->posted = r_xprt->rx_buf.rb_posted_receives; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->count, __entry->posted, __entry->status + ) +); + /** ** Completion events **/ @@ -800,7 +833,6 @@ TRACE_EVENT(xprtrdma_allocate, __field(unsigned int, task_id) __field(unsigned int, client_id) __field(const void *, req) - __field(const void *, rep) __field(size_t, callsize) __field(size_t, rcvsize) ), @@ -809,15 +841,13 @@ TRACE_EVENT(xprtrdma_allocate, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->req = req; - __entry->rep = req ? req->rl_reply : NULL; __entry->callsize = task->tk_rqstp->rq_callsize; __entry->rcvsize = task->tk_rqstp->rq_rcvsize; ), - TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", + TP_printk("task:%u@%u req=%p (%zu, %zu)", __entry->task_id, __entry->client_id, - __entry->req, __entry->rep, - __entry->callsize, __entry->rcvsize + __entry->req, __entry->callsize, __entry->rcvsize ) ); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 4034788c9856..c8f1c2b89dad 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -71,23 +71,6 @@ out_fail: return -ENOMEM; } -/* Allocate and add receive buffers to the rpcrdma_buffer's - * existing list of rep's. These are released when the - * transport is destroyed. - */ -static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, - unsigned int count) -{ - int rc = 0; - - while (count--) { - rc = rpcrdma_create_rep(r_xprt); - if (rc) - break; - } - return rc; -} - /** * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests * @xprt: transport associated with these backchannel resources @@ -116,14 +99,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) if (rc) goto out_free; - rc = rpcrdma_bc_setup_reps(r_xprt, reqs); - if (rc) - goto out_free; - - rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); - if (rc) - goto out_free; - r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; request_module("svcrdma"); trace_xprtrdma_cb_setup(r_xprt, reqs); @@ -228,6 +203,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (rc < 0) goto failed_marshal; + rpcrdma_post_recvs(r_xprt, true); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; return 0; @@ -268,10 +244,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) */ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) { + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpc_xprt *xprt = rqst->rq_xprt; dprintk("RPC: %s: freeing rqst %p (req %p)\n", - __func__, rqst, rpcr_to_rdmar(rqst)); + __func__, rqst, req); + + rpcrdma_recv_buffer_put(req->rl_reply); + req->rl_reply = NULL; spin_lock_bh(&xprt->bc_pa_lock); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8f89e3faae8e..d676106295ff 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1027,8 +1027,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) out_short: pr_warn("RPC/RDMA short backward direction call\n"); - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - xprt_disconnect_done(&r_xprt->rx_xprt); return true; } #else /* CONFIG_SUNRPC_BACKCHANNEL */ @@ -1334,13 +1332,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) u32 credits; __be32 *p; + --buf->rb_posted_receives; + if (rep->rr_hdrbuf.head[0].iov_len == 0) goto out_badstatus; + /* Fixed transport header fields */ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, rep->rr_hdrbuf.head[0].iov_base); - - /* Fixed transport header fields */ p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); if (unlikely(!p)) goto out_shortreply; @@ -1379,17 +1378,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); + rpcrdma_post_recvs(r_xprt, false); queue_work(rpcrdma_receive_wq, &rep->rr_work); return; -out_badstatus: - rpcrdma_recv_buffer_put(rep); - if (r_xprt->rx_ep.rep_connected == 1) { - r_xprt->rx_ep.rep_connected = -EIO; - rpcrdma_conn_func(&r_xprt->rx_ep); - } - return; - out_badversion: trace_xprtrdma_reply_vers(rep); goto repost; @@ -1409,7 +1401,7 @@ out_shortreply: * receive buffer before returning. */ repost: - r_xprt->rx_stats.bad_reply_count++; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - rpcrdma_recv_buffer_put(rep); + rpcrdma_post_recvs(r_xprt, false); +out_badstatus: + rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 79885aa39c24..0c775f05123c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -722,9 +722,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (rc < 0) goto failed_marshal; - if (req->rl_reply == NULL) /* e.g. reconnection */ - rpcrdma_recv_buffer_get(req); - /* Must suppress retransmit to maintain credits */ if (rqst->rq_connect_cookie == xprt->connect_cookie) goto drop_connection; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4b18926de912..2ed2ee4f2c6c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -74,6 +74,7 @@ */ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); +static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); struct workqueue_struct *rpcrdma_receive_wq __read_mostly; @@ -726,7 +727,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - unsigned int extras; int rc; retry: @@ -770,9 +770,8 @@ retry: } dprintk("RPC: %s: connected\n", __func__); - extras = r_xprt->rx_buf.rb_bc_srv_max_requests; - if (extras) - rpcrdma_ep_post_extra_recv(r_xprt, extras); + + rpcrdma_post_recvs(r_xprt, true); out: if (rc) @@ -1082,14 +1081,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) return req; } -/** - * rpcrdma_create_rep - Allocate an rpcrdma_rep object - * @r_xprt: controlling transport - * - * Returns 0 on success or a negative errno on failure. - */ -int -rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) +static int +rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_buffer *buf = &r_xprt->rx_buf; @@ -1117,6 +1110,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; + rep->rr_temp = temp; spin_lock(&buf->rb_lock); list_add(&rep->rr_list, &buf->rb_recv_bufs); @@ -1168,12 +1162,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) list_add(&req->rl_list, &buf->rb_send_bufs); } + buf->rb_posted_receives = 0; INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i <= buf->rb_max_requests; i++) { - rc = rpcrdma_create_rep(r_xprt); - if (rc) - goto out; - } rc = rpcrdma_sendctxs_create(r_xprt); if (rc) @@ -1263,7 +1253,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(rep); } - buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@ -1278,7 +1267,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); - buf->rb_recv_count = 0; rpcrdma_mrs_destroy(buf); } @@ -1351,27 +1339,11 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) __rpcrdma_mr_put(&r_xprt->rx_buf, mr); } -static struct rpcrdma_rep * -rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) -{ - /* If an RPC previously completed without a reply (say, a - * credential problem or a soft timeout occurs) then hold off - * on supplying more Receive buffers until the number of new - * pending RPCs catches up to the number of posted Receives. - */ - if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) - return NULL; - - if (unlikely(list_empty(&buffers->rb_recv_bufs))) - return NULL; - buffers->rb_recv_count++; - return rpcrdma_buffer_get_rep_locked(buffers); -} - -/* - * Get a set of request/reply buffers. +/** + * rpcrdma_buffer_get - Get a request buffer + * @buffers: Buffer pool from which to obtain a buffer * - * Reply buffer (if available) is attached to send buffer upon return. + * Returns a fresh rpcrdma_req, or NULL if none are available. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@ -1379,23 +1351,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) struct rpcrdma_req *req; spin_lock(&buffers->rb_lock); - if (list_empty(&buffers->rb_send_bufs)) - goto out_reqbuf; - buffers->rb_send_count++; + if (unlikely(list_empty(&buffers->rb_send_bufs))) + goto out_noreqs; req = rpcrdma_buffer_get_req_locked(buffers); - req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); - return req; -out_reqbuf: +out_noreqs: spin_unlock(&buffers->rb_lock); return NULL; } -/* - * Put request/reply buffers back into pool. - * Pre-decrement counter/array index. +/** + * rpcrdma_buffer_put - Put request/reply buffers back into pool + * @req: object to return + * */ void rpcrdma_buffer_put(struct rpcrdma_req *req) @@ -1406,27 +1376,16 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; spin_lock(&buffers->rb_lock); - buffers->rb_send_count--; - list_add_tail(&req->rl_list, &buffers->rb_send_bufs); + list_add(&req->rl_list, &buffers->rb_send_bufs); if (rep) { - buffers->rb_recv_count--; - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + if (!rep->rr_temp) { + list_add(&rep->rr_list, &buffers->rb_recv_bufs); + rep = NULL; + } } spin_unlock(&buffers->rb_lock); -} - -/* - * Recover reply buffers from pool. - * This happens when recovering from disconnect. - */ -void -rpcrdma_recv_buffer_get(struct rpcrdma_req *req) -{ - struct rpcrdma_buffer *buffers = req->rl_buffer; - - spin_lock(&buffers->rb_lock); - req->rl_reply = rpcrdma_buffer_get_rep(buffers); - spin_unlock(&buffers->rb_lock); + if (rep) + rpcrdma_destroy_rep(rep); } /* @@ -1438,10 +1397,13 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; - spin_lock(&buffers->rb_lock); - buffers->rb_recv_count--; - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); - spin_unlock(&buffers->rb_lock); + if (!rep->rr_temp) { + spin_lock(&buffers->rb_lock); + list_add(&rep->rr_list, &buffers->rb_recv_bufs); + spin_unlock(&buffers->rb_lock); + } else { + rpcrdma_destroy_rep(rep); + } } /** @@ -1537,13 +1499,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; int rc; - if (req->rl_reply) { - rc = rpcrdma_ep_post_recv(ia, req->rl_reply); - if (rc) - return rc; - req->rl_reply = NULL; - } - if (!ep->rep_send_count || test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { send_wr->send_flags |= IB_SEND_SIGNALED; @@ -1618,3 +1573,70 @@ out_rc: rpcrdma_recv_buffer_put(rep); return rc; } + +/** + * rpcrdma_post_recvs - Maybe post some Receive buffers + * @r_xprt: controlling transport + * @temp: when true, allocate temp rpcrdma_rep objects + * + */ +void +rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct ib_recv_wr *wr, *bad_wr; + int needed, count, rc; + + needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); + if (buf->rb_posted_receives > needed) + return; + needed -= buf->rb_posted_receives; + + count = 0; + wr = NULL; + while (needed) { + struct rpcrdma_regbuf *rb; + struct rpcrdma_rep *rep; + + spin_lock(&buf->rb_lock); + rep = list_first_entry_or_null(&buf->rb_recv_bufs, + struct rpcrdma_rep, rr_list); + if (likely(rep)) + list_del(&rep->rr_list); + spin_unlock(&buf->rb_lock); + if (!rep) { + if (rpcrdma_create_rep(r_xprt, temp)) + break; + continue; + } + + rb = rep->rr_rdmabuf; + if (!rpcrdma_regbuf_is_mapped(rb)) { + if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { + rpcrdma_recv_buffer_put(rep); + break; + } + } + + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); + rep->rr_recv_wr.next = wr; + wr = &rep->rr_recv_wr; + ++count; + --needed; + } + if (!count) + return; + + rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr); + if (rc) { + for (wr = bad_wr; wr; wr = wr->next) { + struct rpcrdma_rep *rep; + + rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); + rpcrdma_recv_buffer_put(rep); + --count; + } + } + buf->rb_posted_receives += count; + trace_xprtrdma_post_recvs(r_xprt, count, rc); +} diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 1d7bb6e5db18..4915a6dd58c9 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -197,6 +197,7 @@ struct rpcrdma_rep { __be32 rr_proc; int rr_wc_flags; u32 rr_inv_rkey; + bool rr_temp; struct rpcrdma_regbuf *rr_rdmabuf; struct rpcrdma_xprt *rr_rxprt; struct work_struct rr_work; @@ -397,11 +398,11 @@ struct rpcrdma_buffer { struct rpcrdma_sendctx **rb_sc_ctxs; spinlock_t rb_lock; /* protect buf lists */ - int rb_send_count, rb_recv_count; struct list_head rb_send_bufs; struct list_head rb_recv_bufs; u32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ + int rb_posted_receives; u32 rb_bc_srv_max_requests; spinlock_t rb_reqslock; /* protect rb_allreqs */ @@ -558,13 +559,13 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); +void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); /* * Buffer calls - xprtrdma/verbs.c */ struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); void rpcrdma_destroy_req(struct rpcrdma_req *); -int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); @@ -577,7 +578,6 @@ void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); -void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, -- cgit v1.2.3-59-g8ed1b From a7986f09986ac1befc85bcab30970312c476dbc7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:25 -0400 Subject: xprtrdma: Remove rpcrdma_ep_{post_recv, post_extra_recv} Clean up: These functions are no longer used. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 2 -- net/sunrpc/xprtrdma/verbs.c | 59 ----------------------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 --- 3 files changed, 64 deletions(-) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ad27e192cdf8..ac82849954e4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -879,8 +879,6 @@ TRACE_EVENT(xprtrdma_rpc_done, ) ); -DEFINE_RXPRT_EVENT(xprtrdma_noreps); - /** ** Callback events **/ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ed2ee4f2c6c..601b78b64483 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1515,65 +1515,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, return 0; } -int -rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, - struct rpcrdma_rep *rep) -{ - struct ib_recv_wr *recv_wr_fail; - int rc; - - if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) - goto out_map; - rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); - if (rc) - return -ENOTCONN; - return 0; - -out_map: - pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); - return -EIO; -} - -/** - * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests - * @r_xprt: transport associated with these backchannel resources - * @count: minimum number of incoming requests expected - * - * Returns zero if all requested buffers were posted, or a negative errno. - */ -int -rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) -{ - struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_rep *rep; - int rc; - - while (count--) { - spin_lock(&buffers->rb_lock); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_reqbuf; - rep = rpcrdma_buffer_get_rep_locked(buffers); - spin_unlock(&buffers->rb_lock); - - rc = rpcrdma_ep_post_recv(ia, rep); - if (rc) - goto out_rc; - } - - return 0; - -out_reqbuf: - spin_unlock(&buffers->rb_lock); - trace_xprtrdma_noreps(r_xprt); - return -ENOMEM; - -out_rc: - rpcrdma_recv_buffer_put(rep); - return rc; -} - /** * rpcrdma_post_recvs - Maybe post some Receive buffers * @r_xprt: controlling transport diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 4915a6dd58c9..44a31246b8f7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -558,7 +558,6 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); -int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); /* @@ -599,8 +598,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) return __rpcrdma_dma_map_regbuf(ia, rb); } -int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); - int rpcrdma_alloc_wq(void); void rpcrdma_destroy_wq(void); -- cgit v1.2.3-59-g8ed1b From efd81e90d3f719a2a7fd696c9ed0247e1cae1b7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:41 -0400 Subject: xprtrdma: Make rpcrdma_sendctx_put_locked() a static function Clean up: The only call site is in the same file as the function's definition. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +++- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index db11aa04c612..0e0b7d5cb74d 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -72,6 +72,7 @@ /* * internal functions */ +static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); @@ -949,7 +950,8 @@ out_emptyq: * * The caller serializes calls to this function (per rpcrdma_buffer). */ -void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) +static void +rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) { struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; unsigned long next_tail; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 44a31246b8f7..c60687934092 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -568,7 +568,6 @@ void rpcrdma_destroy_req(struct rpcrdma_req *); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); -void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); void rpcrdma_mr_put(struct rpcrdma_mr *mr); -- cgit v1.2.3-59-g8ed1b From 2fad659209d5b1dbaa1f58606372571c61fc8cbd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 May 2018 15:35:57 -0400 Subject: xprtrdma: Wait on empty sendctx queue Currently, when the sendctx queue is exhausted during marshaling, the RPC/RDMA transport places the RPC task on the delayq, which forces a wait for HZ >> 2 before the marshal and send is retried. With this change, the transport now places such an RPC task on the pending queue, and wakes it just as soon as more sendctxs become available. This typically takes less than a millisecond, and the write_space waking mechanism is less deadlock-prone. Moreover, the waiting RPC task is holding the transport's write lock, which blocks the transport from sending RPCs. Therefore faster recovery from sendctx queue exhaustion is desirable. Cf. commit 5804891455d5 ("xprtrdma: ->send_request returns -EAGAIN when there are no free MRs"). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 8 +++++++- net/sunrpc/xprtrdma/xprt_rdma.h | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma/xprt_rdma.h') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index b12b0443d33a..a373d0322a80 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -695,7 +695,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, { req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); if (!req->rl_sendctx) - return -ENOBUFS; + return -EAGAIN; req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_req = req; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0e0b7d5cb74d..7276e82db3b4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -878,6 +878,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) sc->sc_xprt = r_xprt; buf->rb_sc_ctxs[i] = sc; } + buf->rb_flags = 0; return 0; @@ -935,7 +936,7 @@ out_emptyq: * completions recently. This is a sign the Send Queue is * backing up. Cause the caller to pause and try again. */ - dprintk("RPC: %s: empty sendctx queue\n", __func__); + set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); r_xprt->rx_stats.empty_sendctx_q++; return NULL; @@ -970,6 +971,11 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) /* Paired with READ_ONCE */ smp_store_release(&buf->rb_sc_tail, next_tail); + + if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { + smp_mb__after_atomic(); + xprt_write_space(&sc->sc_xprt->rx_xprt); + } } static void diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c60687934092..e4a408d7b195 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -400,6 +400,7 @@ struct rpcrdma_buffer { spinlock_t rb_lock; /* protect buf lists */ struct list_head rb_send_bufs; struct list_head rb_recv_bufs; + unsigned long rb_flags; u32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ int rb_posted_receives; @@ -417,6 +418,11 @@ struct rpcrdma_buffer { }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) +/* rb_flags */ +enum { + RPCRDMA_BUF_F_EMPTY_SCQ = 0, +}; + /* * Internal structure for transport instance creation. This * exists primarily for modularity. -- cgit v1.2.3-59-g8ed1b