From bf7ca707ae60045342e145c88a83bbe00f66775f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 12:58:14 -0400 Subject: SUNRPC: Add trace points to observe transport congestion control To help debug problems with RPC/RDMA credit management, replace dprintk() call sites in the transport send lock paths with trace events. Similar trace points are defined for the non-congestion paths. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 93 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffa3c51dbb1a..378233fe5ac7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -777,6 +777,99 @@ TRACE_EVENT(xprt_ping, __get_str(addr), __get_str(port), __entry->status) ); +DECLARE_EVENT_CLASS(xprt_writelock_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + ), + + TP_printk("task:%u@%u snd_task:%u", + __entry->task_id, __entry->client_id, + __entry->snd_task_id) +); + +#define DEFINE_WRITELOCK_EVENT(name) \ + DEFINE_EVENT(xprt_writelock_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_WRITELOCK_EVENT(reserve_xprt); +DEFINE_WRITELOCK_EVENT(release_xprt); + +DECLARE_EVENT_CLASS(xprt_cong_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + __field(unsigned long, cong) + __field(unsigned long, cwnd) + __field(bool, wait) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + __entry->cong = xprt->cong; + __entry->cwnd = xprt->cwnd; + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); + ), + + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", + __entry->task_id, __entry->client_id, + __entry->snd_task_id, __entry->cong, __entry->cwnd, + __entry->wait ? " (wait)" : "") +); + +#define DEFINE_CONG_EVENT(name) \ + DEFINE_EVENT(xprt_cong_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_CONG_EVENT(reserve_cong); +DEFINE_CONG_EVENT(release_cong); +DEFINE_CONG_EVENT(get_cong); +DEFINE_CONG_EVENT(put_cong); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), -- cgit v1.2.3-59-g8ed1b From 4b93dab36f28e673725e5e6123ebfccf7697f96a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 13:07:21 -0400 Subject: xprtrdma: Add unique trace points for posting Local Invalidate WRs When adding frwr_unmap_async way back when, I re-used the existing trace_xprtrdma_post_send() trace point to record the return code of ib_post_send. Unfortunately there are some cases where re-using that trace point causes a crash. Instead, construct a trace point specific to posting Local Invalidate WRs that will always be safe to use in that context, and will act as a trace log eye-catcher for Local Invalidation. Fixes: 847568942f93 ("xprtrdma: Remove fr_state") Fixes: d8099feda483 ("xprtrdma: Reduce context switching due ... ") Signed-off-by: Chuck Lever Tested-by: Bill Baker Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 25 +++++++++++++++++++++++++ net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a13830616107..7fd11ec1c9a4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -735,6 +735,31 @@ TRACE_EVENT(xprtrdma_post_recvs, ) ); +TRACE_EVENT(xprtrdma_post_linv, + TP_PROTO( + const struct rpcrdma_req *req, + int status + ), + + TP_ARGS(req, status), + + TP_STRUCT__entry( + __field(const void *, req) + __field(int, status) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->req = req; + __entry->status = status; + __entry->xid = be32_to_cpu(req->rl_slot.rq_xid); + ), + + TP_printk("req=%p xid=0x%08x status=%d", + __entry->req, __entry->xid, __entry->status + ) +); + /** ** Completion events **/ diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 30065a28628c..9901a811f598 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -570,7 +570,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ bad_wr = NULL; rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); - trace_xprtrdma_post_send(req, rc); /* The final LOCAL_INV WR in the chain is supposed to * do the wake. If it was never posted, the wake will @@ -583,6 +582,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Recycle MRs in the LOCAL_INV chain that did not get posted. */ + trace_xprtrdma_post_linv(req, rc); while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); @@ -673,12 +673,12 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ bad_wr = NULL; rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); - trace_xprtrdma_post_send(req, rc); if (!rc) return; /* Recycle MRs in the LOCAL_INV chain that did not get posted. */ + trace_xprtrdma_post_linv(req, rc); while (bad_wr) { frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); mr = container_of(frwr, struct rpcrdma_mr, frwr); -- cgit v1.2.3-59-g8ed1b From dc15c3d5f16808f7c171b55da6a82a5c0f279647 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:35 -0400 Subject: xprtrdma: Move the rpcrdma_sendctx::sc_wr field Clean up: This field is not needed in the Send completion handler, so it can be moved to struct rpcrdma_req to reduce the size of struct rpcrdma_sendctx, and to reduce the amount of memory that is sloshed between the sending process and the Send completion process. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 5 ++--- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 9 ++++++--- net/sunrpc/xprtrdma/verbs.c | 5 +---- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 5 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 7fd11ec1c9a4..f8edab91e09c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -667,9 +667,8 @@ TRACE_EVENT(xprtrdma_post_send, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; - __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; - __entry->signaled = req->rl_sendctx->sc_wr.send_flags & - IB_SEND_SIGNALED; + __entry->num_sge = req->rl_wr.num_sge; + __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; __entry->status = status; ), diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 5cd871568c67..523722be6a16 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -396,7 +396,7 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; - post_wr = &req->rl_sendctx->sc_wr; + post_wr = &req->rl_wr; list_for_each_entry(mr, &req->rl_registered, mr_list) { struct rpcrdma_frwr *frwr; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 1941b2261ca5..53cd2e3cf003 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -599,7 +599,7 @@ static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); - sc->sc_wr.num_sge++; + req->rl_wr.num_sge++; return true; out_regbuf: @@ -711,7 +711,7 @@ map_tail: } out: - sc->sc_wr.num_sge += sge_no; + req->rl_wr.num_sge += sge_no; if (sc->sc_unmap_count) kref_get(&req->rl_kref); return true; @@ -752,10 +752,13 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); if (!req->rl_sendctx) goto err; - req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_req = req; kref_init(&req->rl_kref); + req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe; + req->rl_wr.sg_list = req->rl_sendctx->sc_sges; + req->rl_wr.num_sge = 0; + req->rl_wr.opcode = IB_WR_SEND; ret = -EIO; if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3ab086aa69bb..2f4658255343 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -811,9 +811,6 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) if (!sc) return NULL; - sc->sc_wr.wr_cqe = &sc->sc_cqe; - sc->sc_wr.sg_list = sc->sc_sges; - sc->sc_wr.opcode = IB_WR_SEND; sc->sc_cqe.done = rpcrdma_wc_send; return sc; } @@ -1469,7 +1466,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_req *req) { - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; + struct ib_send_wr *send_wr = &req->rl_wr; int rc; if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0e5b7f373077..cdd6a3d43e0f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -219,7 +219,6 @@ enum { */ struct rpcrdma_req; struct rpcrdma_sendctx { - struct ib_send_wr sc_wr; struct ib_cqe sc_cqe; struct rpcrdma_req *sc_req; unsigned int sc_unmap_count; @@ -314,6 +313,7 @@ struct rpcrdma_req { struct rpcrdma_rep *rl_reply; struct xdr_stream rl_stream; struct xdr_buf rl_hdrbuf; + struct ib_send_wr rl_wr; struct rpcrdma_sendctx *rl_sendctx; struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ -- cgit v1.2.3-59-g8ed1b From 614f3c96d7e5efd1c4dc699524857130a52c6a7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:53 -0400 Subject: xprtrdma: Pull up sometimes On some platforms, DMA mapping part of a page is more costly than copying bytes. Restore the pull-up code and use that when we think it's going to be faster. The heuristic for now is to pull-up when the size of the RPC message body fits in the buffer underlying the head iovec. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated, as is handling a Send completion, for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 4 ++ net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 82 ++++++++++++++++++++++++++++++++++++--- net/sunrpc/xprtrdma/verbs.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 2 + 5 files changed, 85 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f8edab91e09c..213c72585a5f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -532,6 +532,8 @@ DEFINE_WRCH_EVENT(write); DEFINE_WRCH_EVENT(reply); TRACE_DEFINE_ENUM(rpcrdma_noch); +TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); +TRACE_DEFINE_ENUM(rpcrdma_noch_mapped); TRACE_DEFINE_ENUM(rpcrdma_readch); TRACE_DEFINE_ENUM(rpcrdma_areadch); TRACE_DEFINE_ENUM(rpcrdma_writech); @@ -540,6 +542,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych); #define xprtrdma_show_chunktype(x) \ __print_symbolic(x, \ { rpcrdma_noch, "inline" }, \ + { rpcrdma_noch_pullup, "pullup" }, \ + { rpcrdma_noch_mapped, "mapped" }, \ { rpcrdma_readch, "read list" }, \ { rpcrdma_areadch, "*read list" }, \ { rpcrdma_writech, "write list" }, \ diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 50e075fcdd8f..11685245546a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -79,7 +79,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) *p = xdr_zero; if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, - &rqst->rq_snd_buf, rpcrdma_noch)) + &rqst->rq_snd_buf, rpcrdma_noch_pullup)) return -EIO; trace_xprtrdma_cb_reply(rqst); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a441dbf9f198..4ad88893e964 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -392,7 +392,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, unsigned int pos; int nsegs; - if (rtype == rpcrdma_noch) + if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped) goto done; pos = rqst->rq_snd_buf.head[0].iov_len; @@ -691,6 +691,72 @@ out_mapping_err: return false; } +/* Copy the tail to the end of the head buffer. + */ +static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + unsigned char *dst; + + dst = (unsigned char *)xdr->head[0].iov_base; + dst += xdr->head[0].iov_len + xdr->page_len; + memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len); + r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len; +} + +/* Copy pagelist content into the head buffer. + */ +static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + unsigned int len, page_base, remaining; + struct page **ppages; + unsigned char *src, *dst; + + dst = (unsigned char *)xdr->head[0].iov_base; + dst += xdr->head[0].iov_len; + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_base = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining) { + src = page_address(*ppages); + src += page_base; + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); + memcpy(dst, src, len); + r_xprt->rx_stats.pullup_copy_count += len; + + ppages++; + dst += len; + remaining -= len; + page_base = 0; + } +} + +/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it. + * When the head, pagelist, and tail are small, a pull-up copy + * is considerably less costly than DMA mapping the components + * of @xdr. + * + * Assumptions: + * - the caller has already verified that the total length + * of the RPC Call body will fit into @rl_sendbuf. + */ +static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, + struct xdr_buf *xdr) +{ + if (unlikely(xdr->tail[0].iov_len)) + rpcrdma_pullup_tail_iov(r_xprt, req, xdr); + + if (unlikely(xdr->page_len)) + rpcrdma_pullup_pagelist(r_xprt, req, xdr); + + /* The whole RPC message resides in the head iovec now */ + return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len); +} + static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct xdr_buf *xdr) @@ -779,7 +845,11 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, goto out_unmap; switch (rtype) { - case rpcrdma_noch: + case rpcrdma_noch_pullup: + if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) + goto out_unmap; + break; + case rpcrdma_noch_mapped: if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr)) goto out_unmap; break; @@ -827,6 +897,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct xdr_stream *xdr = &req->rl_stream; enum rpcrdma_chunktype rtype, wtype; + struct xdr_buf *buf = &rqst->rq_snd_buf; bool ddp_allowed; __be32 *p; int ret; @@ -884,8 +955,9 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) */ if (rpcrdma_args_inline(r_xprt, rqst)) { *p++ = rdma_msg; - rtype = rpcrdma_noch; - } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { + rtype = buf->len < rdmab_length(req->rl_sendbuf) ? + rpcrdma_noch_pullup : rpcrdma_noch_mapped; + } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) { *p++ = rdma_msg; rtype = rpcrdma_readch; } else { @@ -927,7 +999,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) goto out_err; ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len, - &rqst->rq_snd_buf, rtype); + buf, rtype); if (ret) goto out_err; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2f4658255343..a514e2c89ac3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1165,7 +1165,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE, + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, GFP_KERNEL); if (!req) goto out; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cdd6a3d43e0f..5d15140a0266 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -554,6 +554,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); enum rpcrdma_chunktype { rpcrdma_noch = 0, + rpcrdma_noch_pullup, + rpcrdma_noch_mapped, rpcrdma_readch, rpcrdma_areadch, rpcrdma_writech, -- cgit v1.2.3-59-g8ed1b From 7b020f17bbd34c219419b634d9efb9e93a3af4c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:01:58 -0400 Subject: xprtrdma: Report the computed connect delay For debugging, the op_connect trace point should report the computed connect delay. We can then ensure that the delay is computed at the proper times, for example. As a further clean-up, remove a few low-value "heartbeat" trace points in the connect path. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 77 ++++++++++++++++++++++++++++++----------- net/sunrpc/xprtrdma/transport.c | 3 +- net/sunrpc/xprtrdma/verbs.c | 13 ++----- 3 files changed, 60 insertions(+), 33 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 213c72585a5f..99e3c61609b2 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -85,6 +85,44 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, ), \ TP_ARGS(r_xprt)) +DECLARE_EVENT_CLASS(xprtrdma_connect_class, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + int rc + ), + + TP_ARGS(r_xprt, rc), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(int, rc) + __field(int, connect_status) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->rc = rc; + __entry->connect_status = r_xprt->rx_ep.rep_connected; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->rc, __entry->connect_status + ) +); + +#define DEFINE_CONN_EVENT(name) \ + DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \ + TP_PROTO( \ + const struct rpcrdma_xprt *r_xprt, \ + int rc \ + ), \ + TP_ARGS(r_xprt, rc)) + DECLARE_EVENT_CLASS(xprtrdma_rdch_event, TP_PROTO( const struct rpc_task *task, @@ -333,47 +371,44 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); -TRACE_EVENT(xprtrdma_disconnect, +DEFINE_CONN_EVENT(connect); +DEFINE_CONN_EVENT(disconnect); + +DEFINE_RXPRT_EVENT(xprtrdma_create); +DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); +DEFINE_RXPRT_EVENT(xprtrdma_remove); +DEFINE_RXPRT_EVENT(xprtrdma_reinsert); +DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); +DEFINE_RXPRT_EVENT(xprtrdma_op_close); + +TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( const struct rpcrdma_xprt *r_xprt, - int status + unsigned long delay ), - TP_ARGS(r_xprt, status), + TP_ARGS(r_xprt, delay), TP_STRUCT__entry( __field(const void *, r_xprt) - __field(int, status) - __field(int, connected) + __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( __entry->r_xprt = r_xprt; - __entry->status = status; - __entry->connected = r_xprt->rx_ep.rep_connected; + __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->status, - __entry->connected == 1 ? "still " : "dis" + TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->delay ) ); -DEFINE_RXPRT_EVENT(xprtrdma_conn_start); -DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); -DEFINE_RXPRT_EVENT(xprtrdma_create); -DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); -DEFINE_RXPRT_EVENT(xprtrdma_remove); -DEFINE_RXPRT_EVENT(xprtrdma_reinsert); -DEFINE_RXPRT_EVENT(xprtrdma_reconnect); -DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_close); -DEFINE_RXPRT_EVENT(xprtrdma_op_connect); TRACE_EVENT(xprtrdma_op_set_cto, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 361e59146807..ce263e6f779c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -527,13 +527,12 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); unsigned long delay; - trace_xprtrdma_op_connect(r_xprt); - delay = 0; if (r_xprt->rx_ep.rep_connected != 0) { delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } + trace_xprtrdma_op_connect(r_xprt, delay); queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, delay); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a514e2c89ac3..92bdf053716a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -297,8 +297,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) struct rdma_cm_id *id; int rc; - trace_xprtrdma_conn_start(xprt); - init_completion(&ia->ri_done); init_completion(&ia->ri_remove_done); @@ -314,10 +312,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) if (rc) goto out; rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); - if (rc < 0) { - trace_xprtrdma_conn_tout(xprt); + if (rc < 0) goto out; - } rc = ia->ri_async_rc; if (rc) @@ -328,10 +324,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) if (rc) goto out; rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); - if (rc < 0) { - trace_xprtrdma_conn_tout(xprt); + if (rc < 0) goto out; - } rc = ia->ri_async_rc; if (rc) goto out; @@ -644,8 +638,6 @@ static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rdma_cm_id *id, *old; int err, rc; - trace_xprtrdma_reconnect(r_xprt); - rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); rc = -EHOSTUNREACH; @@ -744,6 +736,7 @@ out: ep->rep_connected = rc; out_noupdate: + trace_xprtrdma_connect(r_xprt, rc); return rc; } -- cgit v1.2.3-59-g8ed1b From d4957f01d29b2a01200117fc04b9faaa52aca4bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:03 -0400 Subject: xprtrdma: Refine trace_xprtrdma_fixup Slightly reduce overhead and display more useful information. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 60 +++++++++--------------------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 5 ++-- 2 files changed, 15 insertions(+), 50 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 99e3c61609b2..542177c5896f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1084,66 +1084,32 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); TRACE_EVENT(xprtrdma_fixup, TP_PROTO( const struct rpc_rqst *rqst, - int len, - int hdrlen + unsigned long fixup ), - TP_ARGS(rqst, len, hdrlen), + TP_ARGS(rqst, fixup), TP_STRUCT__entry( __field(unsigned int, task_id) __field(unsigned int, client_id) - __field(const void *, base) - __field(int, len) - __field(int, hdrlen) - ), - - TP_fast_assign( - __entry->task_id = rqst->rq_task->tk_pid; - __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->base = rqst->rq_rcv_buf.head[0].iov_base; - __entry->len = len; - __entry->hdrlen = hdrlen; - ), - - TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", - __entry->task_id, __entry->client_id, - __entry->base, __entry->len, __entry->hdrlen - ) -); - -TRACE_EVENT(xprtrdma_fixup_pg, - TP_PROTO( - const struct rpc_rqst *rqst, - int pageno, - const void *pos, - int len, - int curlen - ), - - TP_ARGS(rqst, pageno, pos, len, curlen), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, pos) - __field(int, pageno) - __field(int, len) - __field(int, curlen) + __field(unsigned long, fixup) + __field(size_t, headlen) + __field(unsigned int, pagelen) + __field(size_t, taillen) ), TP_fast_assign( __entry->task_id = rqst->rq_task->tk_pid; __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->pos = pos; - __entry->pageno = pageno; - __entry->len = len; - __entry->curlen = curlen; + __entry->fixup = fixup; + __entry->headlen = rqst->rq_rcv_buf.head[0].iov_len; + __entry->pagelen = rqst->rq_rcv_buf.page_len; + __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", - __entry->task_id, __entry->client_id, - __entry->pageno, __entry->pos, __entry->len, __entry->curlen + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", + __entry->task_id, __entry->client_id, __entry->fixup, + __entry->headlen, __entry->pagelen, __entry->taillen ) ); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 4ad88893e964..26d334c83b38 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1086,7 +1086,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) curlen = copy_len; - trace_xprtrdma_fixup(rqst, copy_len, curlen); srcp += curlen; copy_len -= curlen; @@ -1106,8 +1105,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > pagelist_len) curlen = pagelist_len; - trace_xprtrdma_fixup_pg(rqst, i, srcp, - copy_len, curlen); destp = kmap_atomic(ppages[i]); memcpy(destp + page_base, srcp, curlen); flush_dcache_page(ppages[i]); @@ -1139,6 +1136,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) rqst->rq_private_buf.tail[0].iov_base = srcp; } + if (fixup_copy_count) + trace_xprtrdma_fixup(rqst, fixup_copy_count); return fixup_copy_count; } -- cgit v1.2.3-59-g8ed1b From f54c870d326aa02b73b68d2e0a503ec81dd3a4e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:09 -0400 Subject: xprtrdma: Replace dprintk() in rpcrdma_update_connect_private() Clean up: Use a single trace point to record each connection's negotiated inline thresholds and the computed maximum byte size of transport headers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 4 ---- net/sunrpc/xprtrdma/verbs.c | 21 ++++++++++----------- 3 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 542177c5896f..0ca118bcc5ce 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -371,6 +371,42 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); +TRACE_EVENT(xprtrdma_inline_thresh, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt + ), + + TP_ARGS(r_xprt), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, inline_send) + __field(unsigned int, inline_recv) + __field(unsigned int, max_send) + __field(unsigned int, max_recv) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + const struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + __entry->r_xprt = r_xprt; + __entry->inline_send = ep->rep_inline_send; + __entry->inline_recv = ep->rep_inline_recv; + __entry->max_send = ep->rep_max_inline_send; + __entry->max_recv = ep->rep_max_inline_recv; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->inline_send, __entry->inline_recv, + __entry->max_send, __entry->max_recv + ) +); + DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 26d334c83b38..aec3beb93b25 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -78,8 +78,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) size += rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ - dprintk("RPC: %s: max call header size = %u\n", - __func__, size); return size; } @@ -100,8 +98,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ - dprintk("RPC: %s: max reply header size = %u\n", - __func__, size); return size; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 92bdf053716a..77c7dd7f05e8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -177,11 +177,11 @@ out_flushed: rpcrdma_recv_buffer_put(rep); } -static void -rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, - struct rdma_conn_param *param) +static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, + struct rdma_conn_param *param) { const struct rpcrdma_connect_private *pmsg = param->private_data; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ @@ -197,13 +197,11 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } - if (rsize < r_xprt->rx_ep.rep_inline_recv) - r_xprt->rx_ep.rep_inline_recv = rsize; - if (wsize < r_xprt->rx_ep.rep_inline_send) - r_xprt->rx_ep.rep_inline_send = wsize; - dprintk("RPC: %s: max send %u, max recv %u\n", __func__, - r_xprt->rx_ep.rep_inline_send, - r_xprt->rx_ep.rep_inline_recv); + if (rsize < ep->rep_inline_recv) + ep->rep_inline_recv = rsize; + if (wsize < ep->rep_inline_send) + ep->rep_inline_send = wsize; + rpcrdma_set_max_header_sizes(r_xprt); } @@ -257,7 +255,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_ESTABLISHED: ++xprt->connect_cookie; ep->rep_connected = 1; - rpcrdma_update_connect_private(r_xprt, &event->param.conn); + rpcrdma_update_cm_private(r_xprt, &event->param.conn); + trace_xprtrdma_inline_thresh(r_xprt); wake_up_all(&ep->rep_connect_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: -- cgit v1.2.3-59-g8ed1b From a52c23b8b207d676d6cdf531af482a79fa622b9d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:14 -0400 Subject: xprtrdma: Replace dprintk in xprt_rdma_set_port Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 + net/sunrpc/xprtrdma/transport.c | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 0ca118bcc5ce..69a8278e5cef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -416,6 +416,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_remove); DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); +DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ce263e6f779c..7395eb2cfdeb 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -441,12 +441,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) struct sockaddr *sap = (struct sockaddr *)&xprt->addr; char buf[8]; - dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", - __func__, xprt, - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - port); - rpc_set_port(sap, port); kfree(xprt->address_strings[RPC_DISPLAY_PORT]); @@ -456,6 +450,9 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); + + trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, + rx_xprt)); } /** -- cgit v1.2.3-59-g8ed1b From a264abad51d8ecb7954a2f6d9f1885b38daffc74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Nov 2019 16:25:52 -0500 Subject: SUNRPC: Capture completion of all RPC tasks RPC tasks on the backchannel never invoke xprt_complete_rqst(), so there is no way to report their tk_status at completion. Also, any RPC task that exits via rpc_exit_task() before it is replied to will also disappear without a trace. Introduce a trace point that is symmetrical with rpc_task_begin that captures the termination status of each RPC task. Sample trace output for callback requests initiated on the server: kworker/u8:12-448 [003] 127.025240: rpc_task_end: task:50@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [002] 127.567310: rpc_task_end: task:51@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [001] 130.506817: rpc_task_end: task:52@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task Odd, though, that I never see trace_rpc_task_complete, either in the forward or backchannel. Should it be removed? Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 1 + net/sunrpc/sched.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 378233fe5ac7..205bf28bcada 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -165,6 +165,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(end); DECLARE_EVENT_CLASS(rpc_task_queued, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 987c4b1f0b17..9c79548c6847 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -824,6 +824,7 @@ rpc_reset_task_statistics(struct rpc_task *task) */ void rpc_exit_task(struct rpc_task *task) { + trace_rpc_task_end(task, task->tk_action); task->tk_action = NULL; if (task->tk_ops->rpc_count_stats) task->tk_ops->rpc_count_stats(task, task->tk_calldata); -- cgit v1.2.3-59-g8ed1b