diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2020-03-13 10:42:10 -0400 |
---|---|---|
committer | Chuck Lever <chuck.lever@oracle.com> | 2020-11-30 13:00:22 -0500 |
commit | f6ad77590a5d432589a5d8a211c4e8e50cd8bb63 (patch) | |
tree | d0c7c6f982854252d413c06c1776a6f66bddc9da /net/sunrpc/xprtrdma/svc_rdma_sendto.c | |
parent | NFSD: Invoke svc_encode_result_payload() in "read" NFSD encoders (diff) | |
download | linux-dev-f6ad77590a5d432589a5d8a211c4e8e50cd8bb63.tar.xz linux-dev-f6ad77590a5d432589a5d8a211c4e8e50cd8bb63.zip |
svcrdma: Post RDMA Writes while XDR encoding replies
The only RPC/RDMA ordering requirement between RDMA Writes and RDMA
Sends is that the responder must post the Writes on the Send queue
before posting the Send that conveys the RPC Reply for that Write
payload.
The Linux NFS server implementation now has a transport method that
can post result Payload Writes earlier than svc_rdma_sendto:
->xpo_result_payload()
This gets RDMA Writes going earlier so they are more likely to be
complete at the remote end before the Send completes.
Some care must be taken with pulled-up Replies. We don't want to
push the Write chunk and then send the same payload data via Send.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/svc_rdma_sendto.c')
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 60 |
1 files changed, 34 insertions, 26 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d6436c13d5c4..e8b0d030e1e6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -468,11 +468,14 @@ svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, { ssize_t len, ret; - ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, - rctxt->rc_read_payload_length); - if (ret < 0) - return ret; - len = ret; + len = 0; + if (rctxt->rc_write_list) { + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, + rctxt->rc_read_payload_length); + if (ret < 0) + return ret; + len = ret; + } /* Terminate the Write list */ ret = xdr_stream_encode_item_absent(&sctxt->sc_stream); @@ -556,11 +559,13 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr) { + bool write_chunk_present = rctxt && rctxt->rc_write_list; int elements; /* For small messages, copying bytes is cheaper than DMA mapping. */ - if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) + if (!write_chunk_present && + sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) return true; /* Check whether the xdr_buf has more elements than can @@ -893,9 +898,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) container_of(xprt, struct svcxprt_rdma, sc_xprt); struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; __be32 *rdma_argp = rctxt->rc_recv_buf; - __be32 *wr_lst = rctxt->rc_write_list; __be32 *rp_ch = rctxt->rc_reply_chunk; - struct xdr_buf *xdr = &rqstp->rq_res; struct svc_rdma_send_ctxt *sctxt; __be32 *p; int ret; @@ -920,19 +923,8 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (svc_rdma_encode_read_list(sctxt) < 0) goto err0; - if (wr_lst) { - /* XXX: Presume the client sent only one Write chunk */ - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, - rctxt->rc_read_payload_offset, - rctxt->rc_read_payload_length); - if (ret < 0) - goto err2; - if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) - goto err0; - } else { - if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) - goto err0; - } + if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) + goto err0; if (rp_ch) { ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); if (ret < 0) @@ -974,16 +966,25 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) * @offset: payload's byte offset in @xdr * @length: size of payload, in bytes * - * Returns zero on success. - * - * For the moment, just record the xdr_buf location of the result - * payload. svc_rdma_sendto will use that location later when - * we actually send the payload. + * Return values: + * %0 if successful or nothing needed to be done + * %-EMSGSIZE on XDR buffer overflow + * %-E2BIG if the payload was larger than the Write chunk + * %-EINVAL if client provided too many segments + * %-ENOMEM if rdma_rw context pool was exhausted + * %-ENOTCONN if posting failed (connection is lost) + * %-EIO if rdma_rw initialization failed (DMA mapping, etc) */ int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length) { struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; + struct svcxprt_rdma *rdma; + struct xdr_buf subbuf; + int ret; + + if (!rctxt->rc_write_list || !length) + return 0; /* XXX: Just one READ payload slot for now, since our * transport implementation currently supports only one @@ -992,5 +993,12 @@ int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, rctxt->rc_read_payload_offset = offset; rctxt->rc_read_payload_length = length; + if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length)) + return -EMSGSIZE; + + rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); + ret = svc_rdma_send_write_chunk(rdma, rctxt->rc_write_list, &subbuf); + if (ret < 0) + return ret; return 0; } |