From 1602a7b7d33741d0490682e620bb29e96ad684d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jan 2019 09:17:12 -0500 Subject: SUNRPC: Don't allow compiler optimisation of svc_xprt_release_slot() Use READ_ONCE() to tell the compiler to not optimse away the read of xprt->xpt_flags in svc_xprt_release_slot(). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 4eb8fbf2508d..c3af1aaef551 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -363,9 +363,13 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp) static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) { - if (xprt->xpt_flags & ((1<xpt_flags); + + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) return true; - if (xprt->xpt_flags & ((1<xpt_ops->xpo_has_wspace(xprt) && svc_xprt_slots_in_range(xprt)) return true; -- cgit v1.2.3-59-g8ed1b From 66c898caefd346a88fbef242eb7892fd959308f6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 11 Jan 2019 15:57:09 -0500 Subject: svcrpc: svc_xprt_has_something_to_do seems a little long The long name seemed cute till I wanted to refer to it somewhere else. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c3af1aaef551..a2435d3811a9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -361,7 +361,7 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp) } } -static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +static bool svc_xprt_ready(struct svc_xprt *xprt) { unsigned long xpt_flags; @@ -385,7 +385,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) struct svc_rqst *rqstp = NULL; int cpu; - if (!svc_xprt_has_something_to_do(xprt)) + if (!svc_xprt_ready(xprt)) return; /* Mark transport as busy. It will remain in this state until -- cgit v1.2.3-59-g8ed1b From 95503d295ad6af20f09efff193e085481a962fd2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 11 Jan 2019 15:36:40 -0500 Subject: svcrpc: fix unlikely races preventing queueing of sockets In the rpc server, When something happens that might be reason to wake up a thread to do something, what we do is - modify xpt_flags, sk_sock->flags, xpt_reserved, or xpt_nr_rqsts to indicate the new situation - call svc_xprt_enqueue() to decide whether to wake up a thread. svc_xprt_enqueue may require multiple conditions to be true before queueing up a thread to handle the xprt. In the SMP case, one of the other CPU's may have set another required condition, and in that case, although both CPUs run svc_xprt_enqueue(), it's possible that neither call sees the writes done by the other CPU in time, and neither one recognizes that all the required conditions have been set. A socket could therefore be ignored indefinitely. Add memory barries to ensure that any svc_xprt_enqueue() call will always see the conditions changed by other CPUs before deciding to ignore a socket. I've never seen this race reported. In the unlikely event it happens, another event will usually come along and the problem will fix itself. So I don't think this is worth backporting to stable. Chuck tried this patch and said "I don't see any performance regressions, but my server has only a single last-level CPU cache." Tested-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 12 +++++++++++- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 ++- net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 ++- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a2435d3811a9..61530b1b7754 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -357,6 +357,7 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp) struct svc_xprt *xprt = rqstp->rq_xprt; if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) { atomic_dec(&xprt->xpt_nr_rqsts); + smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ svc_xprt_enqueue(xprt); } } @@ -365,6 +366,15 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) { unsigned long xpt_flags; + /* + * If another cpu has recently updated xpt_flags, + * sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to + * know about it; otherwise it's possible that both that cpu and + * this one could call svc_xprt_enqueue() without either + * svc_xprt_enqueue() recognizing that the conditions below + * are satisfied, and we could stall indefinitely: + */ + smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) @@ -479,7 +489,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space) if (xprt && space < rqstp->rq_reserved) { atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; - + smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ svc_xprt_enqueue(xprt); } } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 828b149eaaef..3ebb158c2279 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -314,8 +314,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) spin_lock(&rdma->sc_rq_dto_lock); list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); - spin_unlock(&rdma->sc_rq_dto_lock); + /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); + spin_unlock(&rdma->sc_rq_dto_lock); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); goto out; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index dc1951759a8e..c35753691960 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -287,9 +287,10 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) spin_lock(&rdma->sc_rq_dto_lock); list_add_tail(&info->ri_readctxt->rc_list, &rdma->sc_read_complete_q); + /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ + set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); spin_unlock(&rdma->sc_rq_dto_lock); - set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); } -- cgit v1.2.3-59-g8ed1b From 14cfbd94998a3ad6aaa67da46d997eea9e31897e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Jan 2019 15:11:40 -0600 Subject: svcrdma: Use struct_size() in kmalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; instance = kmalloc(sizeof(struct foo) + count * sizeof(struct boo), GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kmalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index c35753691960..65ee6fd60162 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -64,8 +64,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) spin_unlock(&rdma->sc_rw_ctxt_lock); } else { spin_unlock(&rdma->sc_rw_ctxt_lock); - ctxt = kmalloc(sizeof(*ctxt) + - SG_CHUNK_SIZE * sizeof(struct scatterlist), + ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), GFP_KERNEL); if (!ctxt) goto out; -- cgit v1.2.3-59-g8ed1b From c7920f06ae755e2b2f3f14895a5fa148e81db7e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Feb 2019 12:00:52 -0500 Subject: svcrdma: Squelch compiler warning when SUNRPC_DEBUG is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC [M] net/sunrpc/xprtrdma/svc_rdma_transport.o linux/net/sunrpc/xprtrdma/svc_rdma_transport.c: In function ‘svc_rdma_accept’: linux/net/sunrpc/xprtrdma/svc_rdma_transport.c:452:19: warning: variable ‘sap’ set but not used [-Wunused-but-set-variable] struct sockaddr *sap; ^ Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 57f86c63a463..ef6afcf5e9c4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -390,8 +390,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_qp_init_attr qp_attr; unsigned int ctxts, rq_depth; struct ib_device *dev; - struct sockaddr *sap; int ret = 0; + RPC_IFDEBUG(struct sockaddr *sap); listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); clear_bit(XPT_CONN, &xprt->xpt_flags); @@ -525,6 +525,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (ret) goto errout; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) dprintk("svcrdma: new connection %p accepted:\n", newxprt); sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); @@ -535,6 +536,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" ord : %d\n", conn_param.initiator_depth); +#endif trace_svcrdma_xprt_accept(&newxprt->sc_xprt); return &newxprt->sc_xprt; -- cgit v1.2.3-59-g8ed1b From 8820bcaa5bd73db2e28caae98f080a04cb6e2abb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 6 Feb 2019 12:00:57 -0500 Subject: svcrdma: Remove syslog warnings in work completion handlers These can result in a lot of log noise, and are able to be triggered by client misbehavior. Since there are trace points in these handlers now, there's no need to spam the log. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 +-------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 11 +---------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 4 ---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 ----- 4 files changed, 2 insertions(+), 27 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 3ebb158c2279..65e2fb9aac65 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -272,11 +272,8 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) return false; ctxt->rc_temp = true; ret = __svc_rdma_post_recv(rdma, ctxt); - if (ret) { - pr_err("svcrdma: failure posting recv buffers: %d\n", - ret); + if (ret) return false; - } } return true; } @@ -322,10 +319,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) goto out; flushed: - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("svcrdma: Recv: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 65ee6fd60162..2121c9b4d275 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -212,13 +212,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (unlikely(wc->status != IB_WC_SUCCESS)) set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("svcrdma: write ctx: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); - } svc_rdma_write_info_free(info); } @@ -277,10 +272,6 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("svcrdma: read ctx: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt); } else { spin_lock(&rdma->sc_rq_dto_lock); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1f200119268c..6fdba72f89f4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -272,10 +272,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); - if (wc->status != IB_WC_WR_FLUSH_ERR) - pr_err("svcrdma: Send: %s (%u/0x%x)\n", - ib_wc_status_msg(wc->status), - wc->status, wc->vendor_err); } svc_xprt_put(&rdma->sc_xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ef6afcf5e9c4..027a3b07d329 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -590,11 +590,6 @@ static void __svc_rdma_free(struct work_struct *work) if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_drain_qp(rdma->sc_qp); - /* We should only be called from kref_put */ - if (kref_read(&xprt->xpt_ref) != 0) - pr_err("svcrdma: sc_xprt still in use? (%d)\n", - kref_read(&xprt->xpt_ref)); - svc_rdma_flush_recv_queues(rdma); /* Final put of backchannel client transport */ -- cgit v1.2.3-59-g8ed1b From b7e5034cbecf5a65b7bfdc2b20a8378039577706 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Feb 2019 12:54:50 -0500 Subject: svcrpc: fix UDP on servers with lots of threads James Pearson found that an NFS server stopped responding to UDP requests if started with more than 1017 threads. sv_max_mesg is about 2^20, so that is probably where the calculation performed by svc_sock_setbufsize(svsk->sk_sock, (serv->sv_nrthreads+3) * serv->sv_max_mesg, (serv->sv_nrthreads+3) * serv->sv_max_mesg); starts to overflow an int. Reported-by: James Pearson Tested-by: James Pearson Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a6a060925e5d..43590a968b73 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -349,12 +349,16 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, /* * Set socket snd and rcv buffer lengths */ -static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, - unsigned int rcv) +static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) { + unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg; + struct socket *sock = svsk->sk_sock; + + nreqs = min(nreqs, INT_MAX / 2 / max_mesg); + lock_sock(sock->sk); - sock->sk->sk_sndbuf = snd * 2; - sock->sk->sk_rcvbuf = rcv * 2; + sock->sk->sk_sndbuf = nreqs * max_mesg * 2; + sock->sk->sk_rcvbuf = nreqs * max_mesg * 2; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); } @@ -516,9 +520,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) * provides an upper bound on the number of threads * which will access the socket. */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - (serv->sv_nrthreads+3) * serv->sv_max_mesg); + svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); skb = NULL; @@ -681,9 +683,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) * receive and respond to one request. * svc_udp_recvfrom will re-adjust if necessary */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); + svc_sock_setbufsize(svsk, 3); /* data might have come in before data_ready set up */ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); -- cgit v1.2.3-59-g8ed1b