aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-06 18:57:04 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-06 18:57:04 -0800
commit7f80c7325be49db3fb8b5f343f47691b7999fda7 (patch)
treee9cc8b3041f52f632d1978a3626e02a2f865d586
parentMerge branch 'spectre' of git://git.armlinux.org.uk/~rmk/linux-arm (diff)
parentSUNRPC: Don't force a redundant disconnection in xs_read_stream() (diff)
downloadlinux-dev-7f80c7325be49db3fb8b5f343f47691b7999fda7.tar.xz
linux-dev-7f80c7325be49db3fb8b5f343f47691b7999fda7.zip
Merge tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client bugfixes from Trond Myklebust: "This is mainly fallout from the updates to the SUNRPC code that is being triggered from less common combinations of NFS mount options. Highlights include: Stable fixes: - Fix a page leak when using RPCSEC_GSS/krb5p to encrypt data. Bugfixes: - Fix a regression that causes the RPC receive code to hang - Fix call_connect_status() so that it handles tasks that got transmitted while queued waiting for the socket lock. - Fix a memory leak in call_encode() - Fix several other connect races. - Fix receive code error handling. - Use the discard iterator rather than MSG_TRUNC for compatibility with AF_UNIX/AF_LOCAL sockets. - nfs: don't dirty kernel pages read by direct-io - pnfs/Flexfiles fix to enforce per-mirror stateid only for NFSv4 data servers" * tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: SUNRPC: Don't force a redundant disconnection in xs_read_stream() SUNRPC: Fix up socket polling SUNRPC: Use the discard iterator rather than MSG_TRUNC SUNRPC: Treat EFAULT as a truncated message in xs_read_stream_request() SUNRPC: Fix up handling of the XDRBUF_SPARSE_PAGES flag SUNRPC: Fix RPC receive hangs SUNRPC: Fix a potential race in xprt_connect() SUNRPC: Fix a memory leak in call_encode() SUNRPC: Fix leak of krb5p encode pages SUNRPC: call_connect_status() must handle tasks that got transmitted nfs: don't dirty kernel pages read by direct-io flexfiles: enforce per-mirror stateid only for v4 DSes
-rw-r--r--fs/nfs/direct.c9
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c6
-rw-r--r--include/linux/sunrpc/xdr.h1
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c4
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/xprt.c13
-rw-r--r--net/sunrpc/xprtsock.c81
7 files changed, 73 insertions, 49 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aa12c3063bae..33824a0a57bf 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -98,8 +98,11 @@ struct nfs_direct_req {
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work;
int flags;
+ /* for write */
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ /* for read */
+#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
struct nfs_writeverf verf; /* unstable write verifier */
};
@@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
- if (!PageCompound(page) && bytes < hdr->good_bytes)
+ if (!PageCompound(page) && bytes < hdr->good_bytes &&
+ (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
set_page_dirty(page);
bytes += req->wb_bytes;
nfs_list_remove_request(req);
@@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
+ if (iter_is_iovec(iter))
+ dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
+
nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 74b36ed883ca..310d7500f665 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1733,7 +1733,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
if (fh)
hdr->args.fh = fh;
- if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+ if (vers == 4 &&
+ !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed;
/*
@@ -1798,7 +1799,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
if (fh)
hdr->args.fh = fh;
- if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+ if (vers == 4 &&
+ !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed;
/*
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 43106ffa6788..2ec128060239 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -72,7 +72,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
buf->head[0].iov_base = start;
buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0;
- buf->bvec = NULL;
buf->pages = NULL;
buf->page_len = 0;
buf->flags = 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5d3f252659f1..ba765473d1f0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1791,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp)
for (i=0; i < rqstp->rq_enc_pages_num; i++)
__free_page(rqstp->rq_enc_pages[i]);
kfree(rqstp->rq_enc_pages);
+ rqstp->rq_release_snd_buf = NULL;
}
static int
@@ -1799,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
int first, last, i;
+ if (rqstp->rq_release_snd_buf)
+ rqstp->rq_release_snd_buf(rqstp);
+
if (snd_buf->page_len == 0) {
rqstp->rq_enc_pages_num = 0;
return 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ae3b8145da35..c6782aa47525 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
+ /* Check if the task was already transmitted */
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ xprt_end_transmit(task);
+ task->tk_action = call_transmit_status;
+ return;
+ }
+
dprint_status(task);
trace_rpc_connect_status(task);
@@ -2302,6 +2309,7 @@ out_retry:
task->tk_status = 0;
/* Note: rpc_verify_header() may have freed the RPC slot */
if (task->tk_rqstp == req) {
+ xdr_free_bvec(&req->rq_rcv_buf);
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 86bea4520c4d..ce927002862a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task)
return;
if (xprt_test_and_set_connecting(xprt))
return;
- xprt->stat.connect_start = jiffies;
- xprt->ops->connect(xprt, task);
+ /* Race breaker */
+ if (!xprt_connected(xprt)) {
+ xprt->stat.connect_start = jiffies;
+ xprt->ops->connect(xprt, task);
+ } else {
+ xprt_clear_connecting(xprt);
+ task->tk_status = 0;
+ rpc_wake_up_queued_task(&xprt->pending, task);
+ }
}
xprt_release_write(xprt, task);
}
@@ -1623,6 +1630,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
req->rq_rcv_buf.buflen = 0;
+ req->rq_snd_buf.bvec = NULL;
+ req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae77c71c1f64..8a5e823e0b33 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
{
size_t i,n;
- if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
return want;
- if (want > buf->page_len)
- want = buf->page_len;
n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < n; i++) {
if (buf->pages[i])
continue;
buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
if (!buf->pages[i]) {
- buf->page_len = (i * PAGE_SIZE) - buf->page_base;
- return buf->page_len;
+ i *= PAGE_SIZE;
+ return i > buf->page_base ? i - buf->page_base : 0;
}
}
return want;
@@ -378,8 +376,8 @@ static ssize_t
xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
- struct kvec kvec = { 0 };
- return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+ iov_iter_discard(&msg->msg_iter, READ, count);
+ return sock_recvmsg(sock, msg, flags);
}
static ssize_t
@@ -398,16 +396,17 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
seek -= buf->head[0].iov_len;
offset += buf->head[0].iov_len;
}
- if (seek < buf->page_len) {
- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_NOWAIT);
+
+ want = xs_alloc_sparse_pages(buf,
+ min_t(size_t, count - offset, buf->page_len),
+ GFP_NOWAIT);
+ if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
want + buf->page_base,
@@ -418,12 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
- seek -= buf->page_len;
- offset += buf->page_len;
+ seek -= want;
+ offset += want;
}
+
if (seek < buf->tail[0].iov_len) {
want = min_t(size_t, count - offset, buf->tail[0].iov_len);
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
@@ -433,17 +433,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
} else
offset += buf->tail[0].iov_len;
ret = -EMSGSIZE;
- msg->msg_flags |= MSG_TRUNC;
out:
*read = offset - seek_init;
return ret;
-eagain:
- ret = -EAGAIN;
- goto out;
sock_err:
offset += seek;
goto out;
@@ -486,19 +482,20 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
- return transport->recv.copied;
+ return read;
}
switch (ret) {
+ default:
+ break;
+ case -EFAULT:
case -EMSGSIZE:
- return transport->recv.copied;
+ msg->msg_flags |= MSG_TRUNC;
+ return read;
case 0:
return -ESHUTDOWN;
- default:
- if (ret < 0)
- return ret;
}
- return -EAGAIN;
+ return ret < 0 ? ret : read;
}
static size_t
@@ -537,7 +534,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
ret = xs_read_stream_request(transport, msg, flags, req);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_bc_request(req, ret);
+ xprt_complete_bc_request(req, transport->recv.copied);
return ret;
}
@@ -570,7 +567,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
spin_lock(&xprt->queue_lock);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_rqst(req->rq_task, ret);
+ xprt_complete_rqst(req->rq_task, transport->recv.copied);
xprt_unpin_rqst(req);
out:
spin_unlock(&xprt->queue_lock);
@@ -591,10 +588,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
if (ret <= 0)
goto out_err;
transport->recv.offset = ret;
- if (ret != want) {
- ret = -EAGAIN;
- goto out_err;
- }
+ if (transport->recv.offset != want)
+ return transport->recv.offset;
transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
RPC_FRAGMENT_SIZE_MASK;
transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -602,6 +597,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
}
switch (be32_to_cpu(transport->recv.calldir)) {
+ default:
+ msg.msg_flags |= MSG_TRUNC;
+ break;
case RPC_CALL:
ret = xs_read_stream_call(transport, &msg, flags);
break;
@@ -616,6 +614,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
goto out_err;
read += ret;
if (transport->recv.offset < transport->recv.len) {
+ if (!(msg.msg_flags & MSG_TRUNC))
+ return read;
+ msg.msg_flags = 0;
ret = xs_read_discard(transport->sock, &msg, flags,
transport->recv.len - transport->recv.offset);
if (ret <= 0)
@@ -623,7 +624,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.offset += ret;
read += ret;
if (transport->recv.offset != transport->recv.len)
- return -EAGAIN;
+ return read;
}
if (xs_read_stream_request_done(transport)) {
trace_xs_stream_read_request(transport);
@@ -633,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.len = 0;
return read;
out_err:
- switch (ret) {
- case 0:
- case -ESHUTDOWN:
- xprt_force_disconnect(&transport->xprt);
- return -ESHUTDOWN;
- }
- return ret;
+ return ret != 0 ? ret : -ESHUTDOWN;
}
static void xs_stream_data_receive(struct sock_xprt *transport)
@@ -648,12 +643,12 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
ret = xs_read_stream(transport, MSG_DONTWAIT);
- if (ret <= 0)
+ if (ret < 0)
break;
read += ret;
cond_resched();
@@ -1345,10 +1340,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
if (skb == NULL)