From 93aa6c7bbc4124c8361c26a8b2c5c40afb185619 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 3 Jul 2015 09:28:09 -0400
Subject: SUNRPC: Don't reencode message if transmission failed with ENOBUFS

If we're running out of buffer memory when transmitting data, then
we want to just delay for a moment, and then continue transmitting
the remainder of the message.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/clnt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cbc6af923dd1..23608eb0ded2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1902,6 +1902,7 @@ call_transmit_status(struct rpc_task *task)
 
 	switch (task->tk_status) {
 	case -EAGAIN:
+	case -ENOBUFS:
 		break;
 	default:
 		dprint_status(task);
@@ -1928,7 +1929,6 @@ call_transmit_status(struct rpc_task *task)
 	case -ECONNABORTED:
 	case -EADDRINUSE:
 	case -ENOTCONN:
-	case -ENOBUFS:
 	case -EPIPE:
 		rpc_task_force_reencode(task);
 	}
@@ -2057,12 +2057,13 @@ call_status(struct rpc_task *task)
 	case -ECONNABORTED:
 		rpc_force_rebind(clnt);
 	case -EADDRINUSE:
-	case -ENOBUFS:
 		rpc_delay(task, 3*HZ);
 	case -EPIPE:
 	case -ENOTCONN:
 		task->tk_action = call_bind;
 		break;
+	case -ENOBUFS:
+		rpc_delay(task, HZ>>2);
 	case -EAGAIN:
 		task->tk_action = call_transmit;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From b5872f0c67edf3714dd46f04d73c3644f3addaf9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 3 Jul 2015 09:32:23 -0400
Subject: SUNRPC: Don't confuse ENOBUFS with a write_space issue

ENOBUFS means that memory allocations are failing due to an actual
low memory situation. It should not be confused with being out of
socket buffer space.

Handle the problem by just punting to the delay in call_status.

Reported-by: Neil Brown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/xprtsock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ee359fc7af16..44c1927b68c7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -539,6 +539,7 @@ static int xs_local_send_request(struct rpc_task *task)
 
 	switch (status) {
 	case -ENOBUFS:
+		break;
 	case -EAGAIN:
 		status = xs_nospace(task);
 		break;
@@ -692,7 +693,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		status = -ENOTCONN;
 		/* Should we call xs_close() here? */
 		break;
-	case -ENOBUFS:
 	case -EAGAIN:
 		status = xs_nospace(task);
 		break;
@@ -703,6 +703,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	case -ECONNREFUSED:
 	case -ENOTCONN:
 	case -EADDRINUSE:
+	case -ENOBUFS:
 	case -EPIPE:
 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 68514471ceceac63c7fa9ad684d882f41be5b2d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Jul 2015 16:31:17 -0400
Subject: SUNRPC: Fix a backchannel deadlock

xprt_alloc_bc_request() cannot call xprt_free_bc_request() without
deadlocking, since it already holds the xprt->bc_pa_lock.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Fixes: 0d2a970d0ae55 ("SUNRPC: Fix a backchannel race")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/backchannel_rqst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 9825ff0f91d6..5a3b50aec397 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -240,8 +240,8 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
 		req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
 		if (!req)
 			goto not_found;
-		/* Note: this 'free' request adds it to xprt->bc_pa_list */
-		xprt_free_bc_request(req);
+		list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+		xprt->bc_alloc_count++;
 	}
 	req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
 				rq_bc_pa_list);
-- 
cgit v1.2.3-59-g8ed1b


From 1980bd4d829a87ccd21b949f8a11ff1b426f5b0b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Jul 2015 17:05:32 -0400
Subject: SUNRPC: xprt_complete_bc_request must also decrement the free slot
 count

Calling xprt_complete_bc_request() effectively causes the slot to be allocated,
so it needs to decrement the backchannel free slot count as well.

Fixes: 0d2a970d0ae5 ("SUNRPC: Fix a backchannel race")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/backchannel_rqst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 5a3b50aec397..6255d141133b 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -336,7 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
 
 	spin_lock(&xprt->bc_pa_lock);
 	list_del(&req->rq_bc_pa_list);
-	xprt->bc_alloc_count--;
+	xprt_dec_alloc_count(xprt, 1);
 	spin_unlock(&xprt->bc_pa_lock);
 
 	req->rq_private_buf.len = copied;
-- 
cgit v1.2.3-59-g8ed1b


From 743c69e7c089ba1bea1b207c5829dd079a4e98f9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 27 Jul 2015 10:55:35 +1000
Subject: sunrpc: translate -EAGAIN to -ENOBUFS when socket is writable.

The networking layer does not reliably report the distinction between
a non-block write failing because:
 1/ the queue is too full already and
 2/ a memory allocation attempt failed.

The distinction is important because in the first case it is
appropriate to retry as soon as the socket reports that it is
writable, and in the second case a small delay is required as the
socket will most likely report as writable but kmalloc could still
fail.

sk_stream_wait_memory() exhibits this distinction nicely, setting
'vm_wait' if a small wait is needed.  However in the non-blocking case
it always returns -EAGAIN no matter the cause of the failure.  This
-EAGAIN call get all the way to sunrpc.

The sunrpc layer expects EAGAIN to indicate the first cause, and
ENOBUFS to indicate the second.  Various documentation suggests that
this is not unreasonable, but does not guarantee the desired error
codes.

The result of getting -EAGAIN when -ENOBUFS is expected is that the
send is tried again in a tight loop and soft lockups are reported.

so: add tests after calls to xs_sendpages() to translate -EAGAIN into
-ENOBUFS if the socket is writable.  This cannot happen inside
xs_sendpages() as the test for "is socket writable" is different
between TCP and UDP.

With this change, the tight loop retrying xs_sendpages() becomes a
loop which only retries every 250ms, and so will not trigger a
soft-lockup warning.

It is possible that the write did fail because the queue was too full
and by the time xs_sendpages() completed, the queue was writable
again.  In this case an extra 250ms delay is inserted that isn't
really needed.  This circumstance suggests a degree of congestion so a
delay is not necessarily a bad thing, and it can only cause a single
250ms delay, not a series of them.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/xprtsock.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 44c1927b68c7..4f48b1a19e9f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,10 @@ static int xs_local_send_request(struct rpc_task *task)
 			      true, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
 			__func__, xdr->len - req->rq_bytes_sent, status);
+
+	if (status == -EAGAIN && sock_writeable(transport->inet))
+		status = -ENOBUFS;
+
 	if (likely(sent > 0) || status == 0) {
 		req->rq_bytes_sent += sent;
 		req->rq_xmit_bytes_sent += sent;
@@ -590,6 +594,9 @@ static int xs_udp_send_request(struct rpc_task *task)
 	if (status == -EPERM)
 		goto process_status;
 
+	if (status == -EAGAIN && sock_writeable(transport->inet))
+		status = -ENOBUFS;
+
 	if (sent > 0 || status == 0) {
 		req->rq_xmit_bytes_sent += sent;
 		if (sent >= req->rq_slen)
@@ -687,6 +694,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		status = -EAGAIN;
 		break;
 	}
+	if (status == -EAGAIN && sk_stream_is_writeable(transport->inet))
+		status = -ENOBUFS;
 
 	switch (status) {
 	case -ENOTSOCK:
-- 
cgit v1.2.3-59-g8ed1b


From f580dd042823294b5b548e0f8bf1ba7a4b114fa5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 11 Jul 2015 17:48:52 +0200
Subject: SUNRPC: Report TCP errors to the caller

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/xprtsock.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net/sunrpc')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4f48b1a19e9f..6a21368bdd8e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -677,9 +677,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
 				xdr->len - req->rq_bytes_sent, status);
 
-		if (unlikely(sent == 0 && status < 0))
-			break;
-
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
 		req->rq_bytes_sent += sent;
@@ -689,10 +686,12 @@ static int xs_tcp_send_request(struct rpc_task *task)
 			return 0;
 		}
 
-		if (sent != 0)
-			continue;
-		status = -EAGAIN;
-		break;
+		if (status < 0)
+			break;
+		if (sent == 0) {
+			status = -EAGAIN;
+			break;
+		}
 	}
 	if (status == -EAGAIN && sk_stream_is_writeable(transport->inet))
 		status = -ENOBUFS;
-- 
cgit v1.2.3-59-g8ed1b