From 482725027ff32bc857f5527fb17feda5361265fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Jan 2018 15:42:18 -0500 Subject: svcrdma: Post Receives in the Receive completion handler This change improves Receive efficiency by posting Receives only on the same CPU that handles Receive completion. Improved latency and throughput has been noted with this change. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 5 ----- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 +-------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 6 ------ net/sunrpc/xprtrdma/svc_rdma_transport.c | 25 +++++++------------------ 4 files changed, 8 insertions(+), 37 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index af7893501e40..a73632ca9048 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -95,7 +95,6 @@ out_shortreply: out_notfound: dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", xprt, be32_to_cpu(xid)); - goto out_unlock; } @@ -129,10 +128,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, if (ret < 0) goto out_err; - ret = svc_rdma_repost_recv(rdma, GFP_NOIO); - if (ret) - goto out_err; - /* Bump page refcnt so Send completion doesn't release * the rq_buffer before all retransmits are complete. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ad4bd62eebf1..19e9c6b33042 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -400,10 +400,6 @@ static void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct page *page; int ret; - ret = svc_rdma_repost_recv(xprt, GFP_KERNEL); - if (ret) - return; - page = alloc_page(GFP_KERNEL); if (!page) return; @@ -554,8 +550,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, &rqstp->rq_arg); svc_rdma_put_context(ctxt, 0); - if (ret) - goto repost; return ret; } @@ -590,6 +584,5 @@ out_postfail: out_drop: svc_rdma_put_context(ctxt, 1); -repost: - return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL); + return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7c3a211e0e9a..649441d5087d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -674,9 +674,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); } - ret = svc_rdma_post_recv(rdma, GFP_KERNEL); - if (ret) - goto err1; ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, wr_lst, rp_ch); if (ret < 0) @@ -687,9 +684,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (ret != -E2BIG && ret != -EINVAL) goto err1; - ret = svc_rdma_post_recv(rdma, GFP_KERNEL); - if (ret) - goto err1; ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); if (ret < 0) goto err0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 46ec069150d5..9ad12a215b51 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -58,6 +58,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +static int svc_rdma_post_recv(struct svcxprt_rdma *xprt); static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, @@ -320,6 +321,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q); spin_unlock(&xprt->sc_rq_dto_lock); + svc_rdma_post_recv(xprt); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) goto out; @@ -404,7 +407,8 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, return cma_xprt; } -int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) +static int +svc_rdma_post_recv(struct svcxprt_rdma *xprt) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; @@ -423,7 +427,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } - page = alloc_page(flags); + page = alloc_page(GFP_KERNEL); if (!page) goto err_put_ctxt; ctxt->pages[sge_no] = page; @@ -459,21 +463,6 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) return -ENOMEM; } -int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) -{ - int ret = 0; - - ret = svc_rdma_post_recv(xprt, flags); - if (ret) { - pr_err("svcrdma: could not post a receive buffer, err=%d.\n", - ret); - pr_err("svcrdma: closing transport %p.\n", xprt); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - ret = -ENOTCONN; - } - return ret; -} - static void svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, struct rdma_conn_param *param) @@ -833,7 +822,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Post receive buffers */ for (i = 0; i < newxprt->sc_max_requests; i++) { - ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); + ret = svc_rdma_post_recv(newxprt); if (ret) { dprintk("svcrdma: failure posting receive buffers\n"); goto errout; -- cgit v1.2.3-59-g8ed1b From d0945caafcccac9d8f4d78f99e0eac9e60386381 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Jan 2018 12:21:04 +0100 Subject: sunrpc: remove dead code in svc_sock_setbufsize Setting values in struct sock directly is the usual method. Remove the long dead code using set_fs() and the related comment. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff8e06cd067e..5884583f93a4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -384,25 +384,11 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp, static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) { -#if 0 - mm_segment_t oldfs; - oldfs = get_fs(); set_fs(KERNEL_DS); - sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char*)&snd, sizeof(snd)); - sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char*)&rcv, sizeof(rcv)); -#else - /* sock_setsockopt limits use to sysctl_?mem_max, - * which isn't acceptable. Until that is made conditional - * on not having CAP_SYS_RESOURCE or similar, we go direct... - * DaveM said I could! - */ lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); -#endif } static int svc_sock_secure_port(struct svc_rqst *rqstp) -- cgit v1.2.3-59-g8ed1b From 175e03101d36c3034f3c80038d4c28838351a7f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 2 Feb 2018 14:28:59 -0500 Subject: svcrdma: Fix Read chunk round-up A single NFSv4 WRITE compound can often have three operations: PUTFH, WRITE, then GETATTR. When the WRITE payload is sent in a Read chunk, the client places the GETATTR in the inline part of the RPC/RDMA message, just after the WRITE operation (sans payload). The position value in the Read chunk enables the receiver to insert the Read chunk at the correct place in the received XDR stream; that is between the WRITE and GETATTR. According to RFC 8166, an NFS/RDMA client does not have to add XDR round-up to the Read chunk that carries the WRITE payload. The receiver adds XDR round-up padding if it is absent and the receiver's XDR decoder requires it to be present. Commit 193bcb7b3719 ("svcrdma: Populate tail iovec when receiving") attempted to add support for receiving such a compound so that just the WRITE payload appears in rq_arg's page list, and the trailing GETATTR is placed in rq_arg's tail iovec. (TCP just strings the whole compound into the head iovec and page list, without regard to the alignment of the WRITE payload). The server transport logic also had to accommodate the optional XDR round-up of the Read chunk, which it did simply by lengthening the tail iovec when round-up was needed. This approach is adequate for the NFSv2 and NFSv3 WRITE decoders. Unfortunately it is not sufficient for nfsd4_decode_write. When the Read chunk length is a couple of bytes less than PAGE_SIZE, the computation at the end of nfsd4_decode_write allows argp->pagelen to go negative, which breaks the logic in read_buf that looks for the tail iovec. The result is that a WRITE operation whose payload length is just less than a multiple of a page succeeds, but the subsequent GETATTR in the same compound fails with NFS4ERR_OP_ILLEGAL because the XDR decoder can't find it. Clients ignore the error, but they must update their attribute cache via a separate round trip. As nfsd4_decode_write appears to expect the payload itself to always have appropriate XDR round-up, have svc_rdma_build_normal_read_chunk add the Read chunk XDR round-up to the page_len rather than lengthening the tail iovec. Reported-by: Olga Kornievskaia Fixes: 193bcb7b3719 ("svcrdma: Populate tail iovec when receiving") Signed-off-by: Chuck Lever Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 9bd04549a1ad..12b9a7e0b6d2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -727,12 +727,16 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, head->arg.head[0].iov_len - info->ri_position; head->arg.head[0].iov_len = info->ri_position; - /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7). + /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). * - * NFSv2/3 write decoders need the length of the tail to - * contain the size of the roundup padding. + * If the client already rounded up the chunk length, the + * length does not change. Otherwise, the length of the page + * list is increased to include XDR round-up. + * + * Currently these chunks always start at page offset 0, + * thus the rounded-up length never crosses a page boundary. */ - head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3); + info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2; head->arg.page_len = info->ri_chunklen; head->arg.len += info->ri_chunklen; -- cgit v1.2.3-59-g8ed1b