aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth.c12
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c39
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c8
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/auth_unix.c16
-rw-r--r--net/sunrpc/backchannel_rqst.c38
-rw-r--r--net/sunrpc/cache.c44
-rw-r--r--net/sunrpc/clnt.c350
-rw-r--r--net/sunrpc/debugfs.c3
-rw-r--r--net/sunrpc/fail.h2
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c125
-rw-r--r--net/sunrpc/socklib.c7
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/sunrpc.h16
-rw-r--r--net/sunrpc/svc.c283
-rw-r--r--net/sunrpc/svc_xprt.c105
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c60
-rw-r--r--net/sunrpc/svcsock.c34
-rw-r--r--net/sunrpc/sysfs.c128
-rw-r--r--net/sunrpc/xdr.c229
-rw-r--r--net/sunrpc/xprt.c119
-rw-r--r--net/sunrpc/xprtmultipath.c115
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c26
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c78
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h10
-rw-r--r--net/sunrpc/xprtsock.c291
41 files changed, 1421 insertions, 809 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a9f0d17fdb0d..fb75a883503f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -445,7 +445,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
- if (!time_in_range(cred->cr_expire, expired, jiffies))
+ if (time_in_range(cred->cr_expire, expired, jiffies))
continue;
if (!rpcauth_unhash_cred(cred))
continue;
@@ -615,6 +615,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
};
struct rpc_cred *ret;
+ if (RPC_IS_ASYNC(task))
+ lookupflags |= RPCAUTH_LOOKUP_ASYNC;
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
put_cred(acred.cred);
return ret;
@@ -631,6 +633,8 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
if (!acred.principal)
return NULL;
+ if (RPC_IS_ASYNC(task))
+ lookupflags |= RPCAUTH_LOOKUP_ASYNC;
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
@@ -654,7 +658,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
};
if (flags & RPC_TASK_ASYNC)
- lookupflags |= RPCAUTH_LOOKUP_NEW;
+ lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC;
if (task->tk_op_cred)
/* Task must use exactly this rpc_cred */
new = get_rpccred(task->tk_op_cred);
@@ -666,7 +670,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
/* If machine cred couldn't be bound, try a root cred */
if (new)
;
- else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
+ else if (cred == &machine_cred)
new = rpcauth_bind_root_cred(task, lookupflags);
else if (flags & RPC_TASK_NULLCREDS)
new = authnull_ops.lookup_cred(NULL, NULL, 0);
@@ -870,7 +874,7 @@ int __init rpcauth_init_module(void)
err = rpc_init_authunix();
if (err < 0)
goto out1;
- err = register_shrinker(&rpc_cred_shrinker);
+ err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred");
if (err < 0)
goto out2;
return 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5f42aa5fc612..7bb247c51e2f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -72,7 +72,8 @@ struct gss_auth {
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
- struct net *net;
+ struct net *net;
+ netns_tracker ns_tracker;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
@@ -145,7 +146,7 @@ gss_alloc_context(void)
{
struct gss_cl_ctx *ctx;
- ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
ctx->gc_proc = RPC_GSS_PROC_DATA;
ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */
@@ -208,7 +209,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(-EFAULT);
goto err;
}
- ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
+ ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL);
if (ret < 0) {
trace_rpcgss_import_ctx(ret);
p = ERR_PTR(ret);
@@ -510,7 +511,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
int vers;
int err = -ENOMEM;
- gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
+ gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
if (gss_msg == NULL)
goto err;
vers = get_pipe_version(gss_auth->net);
@@ -526,7 +527,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
gss_msg->auth = gss_auth;
kref_get(&gss_auth->kref);
if (service_name) {
- gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+ gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL);
if (!gss_msg->service_name) {
err = -ENOMEM;
goto err_put_pipe_version;
@@ -702,7 +703,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (mlen > MSG_BUF_MAXSIZE)
goto out;
err = -ENOMEM;
- buf = kmalloc(mlen, GFP_NOFS);
+ buf = kmalloc(mlen, GFP_KERNEL);
if (!buf)
goto out;
@@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
goto err_free;
}
gss_auth->client = clnt;
- gss_auth->net = get_net(rpc_net_ns(clnt));
+ gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker,
+ GFP_KERNEL);
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech)
@@ -1068,7 +1070,7 @@ err_destroy_credcache:
err_put_mech:
gss_mech_put(gss_auth->mech);
err_put_net:
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
err_free:
kfree(gss_auth->target_name);
kfree(gss_auth);
@@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth)
gss_pipe_free(gss_auth->gss_pipe[0]);
gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
kfree(gss_auth->target_name);
kfree(gss_auth);
@@ -1218,7 +1220,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
struct gss_cred *new;
/* Make a copy of the cred so that we can reference count it */
- new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
+ new = kzalloc(sizeof(*gss_cred), GFP_KERNEL);
if (new) {
struct auth_cred acred = {
.cred = gss_cred->gc_base.cr_cred,
@@ -1338,10 +1340,11 @@ gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
/*
* Lookup RPCSEC_GSS cred for the current process
*/
-static struct rpc_cred *
-gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+static struct rpc_cred *gss_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
+ return rpcauth_lookup_credcache(auth, acred, flags,
+ rpc_task_gfp_mask());
}
static struct rpc_cred *
@@ -1667,7 +1670,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
if (!p)
goto validate_failed;
- seq = kmalloc(4, GFP_NOFS);
+ seq = kmalloc(4, GFP_KERNEL);
if (!seq)
goto validate_failed;
*seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
@@ -1777,11 +1780,11 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
rqstp->rq_enc_pages
= kmalloc_array(rqstp->rq_enc_pages_num,
sizeof(struct page *),
- GFP_NOFS);
+ GFP_KERNEL);
if (!rqstp->rq_enc_pages)
goto out;
for (i=0; i < rqstp->rq_enc_pages_num; i++) {
- rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
+ rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL);
if (rqstp->rq_enc_pages[i] == NULL)
goto out_free;
}
@@ -1985,8 +1988,8 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
if (offset + len > rcv_buf->len)
goto unwrap_failed;
mic.len = len;
- mic.data = kmalloc(len, GFP_NOFS);
- if (!mic.data)
+ mic.data = kmalloc(len, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(mic.data))
goto unwrap_failed;
if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len))
goto unwrap_failed;
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index f6d9631bd9d0..c53b329092d4 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_NOFS);
+ dest->data = kmemdup(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index fe97f3106536..4a4082bb22ad 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
if (ret)
return ret;
- if (!ret) {
- *buf_in = buf;
- *body_size = toksize;
- }
+ *buf_in = buf;
+ *body_size = toksize;
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 634b6c6e0dcb..3ea58175e159 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -161,7 +161,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
if (checksumdata == NULL)
return GSS_S_FAILURE;
@@ -169,7 +169,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
- req = ahash_request_alloc(tfm, GFP_NOFS);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto out_free_ahash;
@@ -257,7 +257,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
@@ -265,7 +265,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
- req = ahash_request_alloc(tfm, GFP_NOFS);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto out_free_ahash;
@@ -554,7 +554,7 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
WARN_ON(0);
return -ENOMEM;
}
- data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS);
+ data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index fb117817ff5d..3200b971a814 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -49,7 +49,7 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
unsigned char *plain;
s32 code;
- plain = kmalloc(8, GFP_NOFS);
+ plain = kmalloc(8, GFP_KERNEL);
if (!plain)
return -ENOMEM;
@@ -80,7 +80,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
dprintk("RPC: krb5_get_seq_num:\n");
- plain = kmalloc(8, GFP_NOFS);
+ plain = kmalloc(8, GFP_KERNEL);
if (!plain)
return -ENOMEM;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index e95c009bb869..48337687848c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
/* initialize to random value */
if (i == 0) {
- i = prandom_u32();
- i = (i << 32) | prandom_u32();
+ i = get_random_u32();
+ i = (i << 32) | get_random_u32();
}
switch (conflen) {
@@ -409,7 +409,7 @@ static u32
gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
- u8 *ptr, *plainhdr;
+ u8 *ptr;
time64_t now;
u8 flags = 0x00;
__be16 *be16ptr;
@@ -426,7 +426,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
return GSS_S_FAILURE;
/* construct gss token header */
- ptr = plainhdr = buf->head[0].iov_base + offset;
+ ptr = buf->head[0].iov_base + offset;
*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 61c276bddaf2..f549e4c05def 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
* done without the correct namespace:
*/
.flags = RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_CONNECTED |
RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
};
struct rpc_clnt *clnt;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b87565b64928..bcd74dddbe2d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
@@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
int pad, remaining_len, offset;
u32 rseqno;
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
@@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net)
static ssize_t write_gssp(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
char tbuf[20];
unsigned long i;
int res;
@@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
static ssize_t read_gssp(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
unsigned long p = *ppos;
char tbuf[10];
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index e7df1f782b2e..1e091d3fa607 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -40,11 +40,19 @@ unx_destroy(struct rpc_auth *auth)
/*
* Lookup AUTH_UNIX creds for current process
*/
-static struct rpc_cred *
-unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
{
- struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
-
+ struct rpc_cred *ret;
+
+ ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask());
+ if (!ret) {
+ if (!(flags & RPCAUTH_LOOKUP_ASYNC))
+ return ERR_PTR(-ENOMEM);
+ ret = mempool_alloc(unix_pool, GFP_NOWAIT);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+ }
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 22a2c235abf1..65a6c6429a53 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -1,23 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/******************************************************************************
(c) 2007 Network Appliance, Inc. All Rights Reserved.
(c) 2009 NetApp. All Rights Reserved.
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-https://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
@@ -64,6 +50,17 @@ static void xprt_free_allocation(struct rpc_rqst *req)
kfree(req);
}
+static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf)
+{
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_len = 0;
+ buf->pages = NULL;
+ buf->page_len = 0;
+ buf->flags = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+}
+
static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
{
struct page *page;
@@ -75,9 +72,9 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
return 0;
}
-static
-struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
{
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
struct rpc_rqst *req;
/* Pre-allocate one backchannel rpc_rqst */
@@ -154,7 +151,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
- req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ req = xprt_alloc_bc_req(xprt);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
@@ -292,6 +289,9 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
spin_lock_bh(&xprt->bc_pa_lock);
if (xprt_need_to_requeue(xprt)) {
+ xprt_bc_reinit_xdr_buf(&req->rq_snd_buf);
+ xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf);
+ req->rq_rcv_buf.len = PAGE_SIZE;
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
atomic_inc(&xprt->bc_slot_count);
@@ -343,7 +343,7 @@ found:
break;
} else if (req)
break;
- new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ new = xprt_alloc_bc_req(xprt);
} while (new);
return req;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 59641803472c..f075a9fb5ccc 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,7 +33,9 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <trace/events/sunrpc.h>
+
#include "netns.h"
+#include "fail.h"
#define RPCDBG_FACILITY RPCDBG_CACHE
@@ -675,7 +677,7 @@ static void cache_limit_defers(void)
/* Consider removing either the first or the last */
if (cache_defer_cnt > DFR_MAX) {
- if (prandom_u32() & 1)
+ if (prandom_u32_max(2))
discard = list_entry(cache_defer_list.next,
struct cache_deferred_req, recent);
else
@@ -688,16 +690,30 @@ static void cache_limit_defers(void)
discard->revisit(discard, 1);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static inline bool cache_defer_immediately(void)
+{
+ return !fail_sunrpc.ignore_cache_wait &&
+ should_fail(&fail_sunrpc.attr, 1);
+}
+#else
+static inline bool cache_defer_immediately(void)
+{
+ return false;
+}
+#endif
+
/* Return true if and only if a deferred request is queued. */
static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq;
- if (req->thread_wait) {
+ if (!cache_defer_immediately()) {
cache_wait_req(req, item);
if (!test_bit(CACHE_PENDING, &item->flags))
return false;
}
+
dreq = req->defer(req);
if (dreq == NULL)
return false;
@@ -1536,7 +1552,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_read(filp, buf, count, ppos, cd);
}
@@ -1544,14 +1560,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_write(filp, buf, count, ppos, cd);
}
static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_poll(filp, wait, cd);
}
@@ -1560,21 +1576,21 @@ static long cache_ioctl_procfs(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_ioctl(inode, filp, cmd, arg, cd);
}
static int cache_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_open(inode, filp, cd);
}
static int cache_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_release(inode, filp, cd);
}
@@ -1591,14 +1607,14 @@ static const struct proc_ops cache_channel_proc_ops = {
static int content_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_open(inode, filp, cd);
}
static int content_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_release(inode, filp, cd);
}
@@ -1612,14 +1628,14 @@ static const struct proc_ops content_proc_ops = {
static int open_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return open_flush(inode, filp, cd);
}
static int release_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return release_flush(inode, filp, cd);
}
@@ -1627,7 +1643,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return read_flush(filp, buf, count, ppos, cd);
}
@@ -1636,7 +1652,7 @@ static ssize_t write_flush_procfs(struct file *filp,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return write_flush(filp, buf, count, ppos, cd);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a312ea2bc440..993acf38af87 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task,
static int rpc_decode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
+static int rpc_ping_noreply(struct rpc_clnt *clnt);
static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
@@ -344,7 +345,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt)
{
int clid;
- clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
+ clid = ida_alloc(&rpc_clids, GFP_KERNEL);
if (clid < 0)
return clid;
clnt->cl_clid = clid;
@@ -353,7 +354,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt)
static void rpc_free_clid(struct rpc_clnt *clnt)
{
- ida_simple_remove(&rpc_clids, clnt->cl_clid);
+ ida_free(&rpc_clids, clnt->cl_clid);
}
static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
+ } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+ int err = rpc_ping_noreply(clnt);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
}
clnt->cl_softrtry = 1;
@@ -644,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
@@ -778,7 +786,8 @@ out_revert:
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
static
-int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
+ void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
{
struct rpc_xprt_switch *xps;
@@ -787,11 +796,24 @@ int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
rcu_read_unlock();
if (xps == NULL)
return -EAGAIN;
- xprt_iter_init_listall(xpi, xps);
+ func(xpi, xps);
xprt_switch_put(xps);
return 0;
}
+static
+int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+{
+ return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listall);
+}
+
+static
+int rpc_clnt_xprt_iter_offline_init(struct rpc_clnt *clnt,
+ struct rpc_xprt_iter *xpi)
+{
+ return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listoffline);
+}
+
/**
* rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
* @clnt: pointer to client
@@ -851,6 +873,57 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
+/**
+ * rpc_cancel_tasks - try to cancel a set of RPC tasks
+ * @clnt: Pointer to RPC client
+ * @error: RPC task error value to set
+ * @fnmatch: Pointer to selector function
+ * @data: User data
+ *
+ * Uses @fnmatch to define a set of RPC tasks that are to be cancelled.
+ * The argument @error must be a negative error value.
+ */
+unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
+ bool (*fnmatch)(const struct rpc_task *,
+ const void *),
+ const void *data)
+{
+ struct rpc_task *task;
+ unsigned long count = 0;
+
+ if (list_empty(&clnt->cl_tasks))
+ return 0;
+ /*
+ * Spin lock all_tasks to prevent changes...
+ */
+ spin_lock(&clnt->cl_lock);
+ list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
+ if (!RPC_IS_ACTIVATED(task))
+ continue;
+ if (!fnmatch(task, data))
+ continue;
+ rpc_task_try_cancel(task, error);
+ count++;
+ }
+ spin_unlock(&clnt->cl_lock);
+ return count;
+}
+EXPORT_SYMBOL_GPL(rpc_cancel_tasks);
+
+static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt, void *dummy)
+{
+ if (xprt_connected(xprt))
+ xprt_force_disconnect(xprt);
+ return 0;
+}
+
+void rpc_clnt_disconnect(struct rpc_clnt *clnt)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_disconnect);
+
/*
* Properly shut down an RPC client, terminating all outstanding
* requests.
@@ -1065,8 +1138,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (task->tk_xprt)
- return;
+ if (task->tk_xprt) {
+ if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE)))
+ return;
+ xprt_release(task);
+ xprt_put(task->tk_xprt);
+ }
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
else
@@ -1085,8 +1163,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- if (atomic_read(&clnt->cl_swapper))
- task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1127,6 +1203,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
+ if (IS_ERR(task))
+ return task;
if (!RPC_IS_ASYNC(task))
task->tk_flags |= RPC_TASK_CRED_NOREF;
@@ -1227,6 +1305,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ xprt_free_bc_request(req);
+ return task;
+ }
+
xprt_init_bc_request(req, task);
task->tk_action = call_bc_encode;
@@ -1610,7 +1693,7 @@ static void
__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
{
trace_rpc_call_rpcerror(task, tk_status, rpc_status);
- task->tk_rpc_status = rpc_status;
+ rpc_task_set_rpc_status(task, rpc_status);
rpc_exit(task, tk_status);
}
@@ -1745,6 +1828,9 @@ call_refreshresult(struct rpc_task *task)
task->tk_cred_retry--;
trace_rpc_retry_refresh_status(task);
return;
+ case -ENOMEM:
+ rpc_delay(task, HZ >> 4);
+ return;
}
trace_rpc_refresh_status(task);
rpc_call_rpcerror(task, status);
@@ -1835,7 +1921,6 @@ rpc_xdr_encode(struct rpc_task *task)
req->rq_snd_buf.head[0].iov_len = 0;
xdr_init_encode(&xdr, &req->rq_snd_buf,
req->rq_snd_buf.head[0].iov_base, req);
- xdr_free_bvec(&req->rq_snd_buf);
if (rpc_encode_header(task, &xdr))
return;
@@ -1855,6 +1940,9 @@ call_encode(struct rpc_task *task)
xprt_request_dequeue_xprt(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
rpc_xdr_encode(task);
+ /* Add task to reply queue before transmission to avoid races */
+ if (task->tk_status == 0 && rpc_reply_expected(task))
+ task->tk_status = xprt_request_enqueue_receive(task);
/* Did the encode result in an error condition? */
if (task->tk_status != 0) {
/* Was the error nonfatal? */
@@ -1865,7 +1953,7 @@ call_encode(struct rpc_task *task)
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
@@ -1878,9 +1966,6 @@ call_encode(struct rpc_task *task)
return;
}
- /* Add task to reply queue before transmission to avoid races */
- if (rpc_reply_expected(task))
- xprt_request_enqueue_receive(task);
xprt_request_enqueue_transmit(task);
out:
task->tk_action = call_transmit;
@@ -2116,7 +2201,8 @@ call_connect_status(struct rpc_task *task)
xprt_release(task);
value = atomic_long_dec_return(&xprt->queuelen);
if (value == 0)
- rpc_xprt_switch_remove_xprt(xps, saved);
+ rpc_xprt_switch_remove_xprt(xps, saved,
+ true);
xprt_put(saved);
task->tk_xprt = NULL;
task->tk_action = call_start;
@@ -2197,6 +2283,7 @@ call_transmit_status(struct rpc_task *task)
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2280,6 +2367,7 @@ call_bc_transmit_status(struct rpc_task *task)
case -ENOTCONN:
case -EPIPE:
break;
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2362,6 +2450,11 @@ call_status(struct rpc_task *task)
case -EPIPE:
case -EAGAIN:
break;
+ case -ENFILE:
+ case -ENOBUFS:
+ case -ENOMEM:
+ rpc_delay(task, HZ>>2);
+ break;
case -EIO:
/* shutdown or soft timeout */
goto out_exit;
@@ -2393,10 +2486,8 @@ rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- if (RPC_SIGNALLED(task)) {
- rpc_call_rpcerror(task, -ERESTARTSYS);
+ if (RPC_SIGNALLED(task))
return;
- }
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
@@ -2622,7 +2713,7 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_garbage;
+ goto out_err;
out_msg_denied:
error = -EACCES;
@@ -2689,6 +2780,10 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
+static const struct rpc_procinfo rpcproc_null_noreply = {
+ .p_encode = rpcproc_encode_null,
+};
+
static void
rpc_null_call_prepare(struct rpc_task *task, void *data)
{
@@ -2742,6 +2837,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
return status;
}
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null_noreply,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpc_null_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
@@ -2793,7 +2910,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
return -EINVAL;
}
- data = kmalloc(sizeof(*data), GFP_NOFS);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->xps = xprt_switch_get(xps);
@@ -2805,6 +2922,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
data->xps->xps_nunique_destaddr_xprts++;
rpc_put_task(task);
success:
@@ -2812,6 +2932,30 @@ success:
}
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
+static int rpc_clnt_add_xprt_helper(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_task *task;
+ int status = -EADDRINUSE;
+
+ /* Test the connection */
+ task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ status = task->tk_status;
+ rpc_put_task(task);
+
+ if (status < 0)
+ return status;
+
+ /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
+ data->add_xprt_test(clnt, xprt, data->data);
+
+ return 0;
+}
+
/**
* rpc_clnt_setup_test_and_add_xprt()
*
@@ -2835,8 +2979,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
- struct rpc_task *task;
- struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
int status = -EADDRINUSE;
xprt = xprt_get(xprt);
@@ -2845,31 +2987,19 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
goto out_err;
- /* Test the connection */
- task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
- if (IS_ERR(task)) {
- status = PTR_ERR(task);
- goto out_err;
- }
- status = task->tk_status;
- rpc_put_task(task);
-
+ status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
if (status < 0)
goto out_err;
- /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
- xtest->add_xprt_test(clnt, xprt, xtest->data);
-
- xprt_put(xprt);
- xprt_switch_put(xps);
-
- /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
- return 1;
+ status = 1;
out_err:
xprt_put(xprt);
xprt_switch_put(xps);
- pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n",
- status, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ if (status < 0)
+ pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not "
+ "added\n", status,
+ xprt->address_strings[RPC_DISPLAY_ADDR]);
+ /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
return status;
}
EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt);
@@ -2900,7 +3030,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
- int ret = 0;
+ int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
@@ -2914,8 +3044,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
+ ident = xprt->xprt_class->ident;
rcu_read_unlock();
+ if (!xprtargs->ident)
+ xprtargs->ident = ident;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
@@ -2943,6 +3076,110 @@ out_put_switch:
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_xprt_switch *xps;
+ struct rpc_xprt *main_xprt;
+ int status = 0;
+
+ xprt_get(xprt);
+
+ rcu_read_lock();
+ main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
+ (struct sockaddr *)&main_xprt->addr);
+ rcu_read_unlock();
+ xprt_put(main_xprt);
+ if (status || !test_bit(XPRT_OFFLINE, &xprt->state))
+ goto out;
+
+ status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
+out:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ return status;
+}
+
+/* rpc_clnt_probe_trunked_xprt -- probe offlined transport for session trunking
+ * @clnt rpc_clnt structure
+ *
+ * For each offlined transport found in the rpc_clnt structure call
+ * the function rpc_xprt_probe_trunked() which will determine if this
+ * transport still belongs to the trunking group.
+ */
+void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *clnt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_xprt_iter xpi;
+ int ret;
+
+ ret = rpc_clnt_xprt_iter_offline_init(clnt, &xpi);
+ if (ret)
+ return;
+ for (;;) {
+ struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
+
+ if (!xprt)
+ break;
+ ret = rpc_xprt_probe_trunked(clnt, xprt, data);
+ xprt_put(xprt);
+ if (ret < 0)
+ break;
+ xprt_iter_rewind(&xpi);
+ }
+ xprt_iter_destroy(&xpi);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_probe_trunked_xprts);
+
+static int rpc_xprt_offline(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ struct rpc_xprt *main_xprt;
+ struct rpc_xprt_switch *xps;
+ int err = 0;
+
+ xprt_get(xprt);
+
+ rcu_read_lock();
+ main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ err = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
+ (struct sockaddr *)&main_xprt->addr);
+ rcu_read_unlock();
+ xprt_put(main_xprt);
+ if (err)
+ goto out;
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ err = -EINTR;
+ goto out;
+ }
+ xprt_set_offline_locked(xprt, xps);
+
+ xprt_release_write(xprt, NULL);
+out:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ return err;
+}
+
+/* rpc_clnt_manage_trunked_xprts -- offline trunked transports
+ * @clnt rpc_clnt structure
+ *
+ * For each active transport found in the rpc_clnt structure call
+ * the function rpc_xprt_offline() which will identify trunked transports
+ * and will mark them offline.
+ */
+void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *clnt)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt, rpc_xprt_offline, NULL);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_manage_trunked_xprts);
+
struct connect_timeout_data {
unsigned long connect_timeout;
unsigned long reconnect_timeout;
@@ -2985,8 +3222,22 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
+void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ rcu_read_unlock();
+ xprt_set_online_locked(xprt, xps);
+}
+
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ if (rpc_clnt_xprt_switch_has_addr(clnt,
+ (const struct sockaddr *)&xprt->addr)) {
+ return rpc_clnt_xprt_set_online(clnt, xprt);
+ }
rcu_read_lock();
rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
xprt);
@@ -2994,6 +3245,19 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
+void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ rpc_xprt_switch_remove_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
+ xprt, 0);
+ xps->xps_nunique_destaddr_xprts--;
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_remove_xprt);
+
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
const struct sockaddr *sap)
{
@@ -3065,6 +3329,8 @@ rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
int
rpc_clnt_swap_activate(struct rpc_clnt *clnt)
{
+ while (clnt != clnt->cl_parent)
+ clnt = clnt->cl_parent;
if (atomic_inc_return(&clnt->cl_swapper) == 1)
return rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_activate_callback, NULL);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 7dc9cc929bfd..a176d5a0b0ee 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_server_disconnect);
+
+ debugfs_create_bool("ignore-cache-wait", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_cache_wait);
}
#else
static void fail_sunrpc_init(void)
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
index 69dc30cc44b8..4b4b500df428 100644
--- a/net/sunrpc/fail.h
+++ b/net/sunrpc/fail.h
@@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
struct fault_attr attr;
bool ignore_client_disconnect;
-
bool ignore_server_disconnect;
+ bool ignore_cache_wait;
};
extern struct fail_sunrpc_attr fail_sunrpc;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ee5336d73fdd..0b6034fab9ab 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -197,7 +197,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
- rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
+ rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_unlink(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 647b323cc1d5..5a8e6d46809a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task)
goto bailout_nofree;
}
- map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
+ map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask());
if (!map) {
status = -ENOMEM;
goto bailout_release_client;
@@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task)
case RPCBVERS_4:
case RPCBVERS_3:
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
- map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
+ map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask());
if (!map->r_addr) {
status = -ENOMEM;
goto bailout_free_args;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index e2c835482791..be587a308e05 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -57,6 +57,21 @@ struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
EXPORT_SYMBOL_GPL(xprtiod_workqueue);
+gfp_t rpc_task_gfp_mask(void)
+{
+ if (current->flags & PF_WQ_WORKER)
+ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ return GFP_KERNEL;
+}
+EXPORT_SYMBOL_GPL(rpc_task_gfp_mask);
+
+bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status)
+{
+ if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0)
+ return true;
+ return false;
+}
+
unsigned long
rpc_task_timeout(const struct rpc_task *task)
{
@@ -186,11 +201,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
/*
* Add new request to wait queue.
- *
- * Swapper tasks always get inserted at the head of the queue.
- * This should avoid many nasty memory deadlocks and hopefully
- * improve overall performance.
- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
@@ -199,8 +209,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
- else if (RPC_IS_SWAPPER(task))
- list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
@@ -268,7 +276,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- freezable_schedule_unsafe();
+ schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
@@ -332,14 +340,12 @@ static int rpc_complete_task(struct rpc_task *task)
* to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task().
*/
-int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
+int rpc_wait_for_completion_task(struct rpc_task *task)
{
- if (action == NULL)
- action = rpc_wait_bit_killable;
return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
- action, TASK_KILLABLE);
+ rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
-EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
+EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
@@ -854,12 +860,25 @@ void rpc_signal_task(struct rpc_task *task)
if (!RPC_IS_ACTIVATED(task))
return;
+ if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+ return;
trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
- rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+ rpc_wake_up_queued_task(queue, task);
+}
+
+void rpc_task_try_cancel(struct rpc_task *task, int error)
+{
+ struct rpc_wait_queue *queue;
+
+ if (!rpc_task_set_rpc_status(task, error))
+ return;
+ queue = READ_ONCE(task->tk_waitqueue);
+ if (queue)
+ rpc_wake_up_queued_task(queue, task);
}
void rpc_exit(struct rpc_task *task, int status)
@@ -876,6 +895,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
ops->rpc_release(calldata);
}
+static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk)
+{
+ if (!xprt)
+ return false;
+ if (!atomic_read(&xprt->swapper))
+ return false;
+ return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk;
+}
+
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
@@ -884,6 +912,7 @@ static void __rpc_execute(struct rpc_task *task)
struct rpc_wait_queue *queue;
int task_is_async = RPC_IS_ASYNC(task);
int status = 0;
+ unsigned long pflags = current->flags;
WARN_ON_ONCE(RPC_IS_QUEUED(task));
if (RPC_IS_QUEUED(task))
@@ -896,16 +925,26 @@ static void __rpc_execute(struct rpc_task *task)
* Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
- * In particular, note that rpc_killall_tasks may
- * do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
+ /* Tasks with an RPC error status should exit */
+ if (do_action != rpc_exit_task &&
+ (status = READ_ONCE(task->tk_rpc_status)) != 0) {
+ task->tk_status = status;
+ if (do_action != NULL)
+ do_action = rpc_exit_task;
+ }
+ /* Callbacks override all actions */
if (task->tk_callback) {
do_action = task->tk_callback;
task->tk_callback = NULL;
}
if (!do_action)
break;
+ if (RPC_IS_SWAPPER(task) ||
+ xprt_needs_memalloc(task->tk_xprt, task))
+ current->flags |= PF_MEMALLOC;
+
trace_rpc_task_run_action(task, do_action);
do_action(task);
@@ -918,14 +957,6 @@ static void __rpc_execute(struct rpc_task *task)
}
/*
- * Signalled tasks should exit rather than sleep.
- */
- if (RPC_SIGNALLED(task)) {
- task->tk_rpc_status = -ERESTARTSYS;
- rpc_exit(task, -ERESTARTSYS);
- }
-
- /*
* The queue->lock protects against races with
* rpc_make_runnable().
*
@@ -940,16 +971,22 @@ static void __rpc_execute(struct rpc_task *task)
spin_unlock(&queue->lock);
continue;
}
+ /* Wake up any task that has an exit status */
+ if (READ_ONCE(task->tk_rpc_status) != 0) {
+ rpc_wake_up_task_queue_locked(queue, task);
+ spin_unlock(&queue->lock);
+ continue;
+ }
rpc_clear_running(task);
spin_unlock(&queue->lock);
if (task_is_async)
- return;
+ goto out;
/* sync task: sleep here */
trace_rpc_task_sync_sleep(task, task->tk_action);
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE);
if (status < 0) {
/*
* When a sync task receives a signal, it exits with
@@ -957,16 +994,15 @@ static void __rpc_execute(struct rpc_task *task)
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
- trace_rpc_task_signalled(task, task->tk_action);
- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
- task->tk_rpc_status = -ERESTARTSYS;
- rpc_exit(task, -ERESTARTSYS);
+ rpc_signal_task(task);
}
trace_rpc_task_sync_wake(task, task->tk_action);
}
/* Release all resources associated with the task */
rpc_release_task(task);
+out:
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/*
@@ -1021,15 +1057,15 @@ int rpc_malloc(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOFS;
-
- if (RPC_IS_SWAPPER(task))
- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp_t gfp = rpc_task_gfp_mask();
size += sizeof(struct rpc_buffer);
- if (size <= RPC_BUFFER_MAXSIZE)
- buf = mempool_alloc(rpc_buffer_mempool, gfp);
- else
+ if (size <= RPC_BUFFER_MAXSIZE) {
+ buf = kmem_cache_alloc(rpc_buffer_slabp, gfp);
+ /* Reach for the mempool if dynamic allocation fails */
+ if (!buf && RPC_IS_ASYNC(task))
+ buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT);
+ } else
buf = kmalloc(size, gfp);
if (!buf)
@@ -1092,10 +1128,14 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
rpc_init_task_statistics(task);
}
-static struct rpc_task *
-rpc_alloc_task(void)
+static struct rpc_task *rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ struct rpc_task *task;
+
+ task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask());
+ if (task)
+ return task;
+ return mempool_alloc(rpc_task_mempool, GFP_NOWAIT);
}
/*
@@ -1108,6 +1148,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
if (task == NULL) {
task = rpc_alloc_task();
+ if (task == NULL) {
+ rpc_release_calldata(setup_data->callback_ops,
+ setup_data->callback_data);
+ return ERR_PTR(-ENOMEM);
+ }
flags = RPC_TASK_DYNAMIC;
}
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index d52313af82bc..71ba4cf513bc 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -15,6 +15,7 @@
#include <linux/pagemap.h>
#include <linux/udp.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xdr.h>
#include <linux/export.h>
@@ -220,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, size_t base)
{
- int err;
-
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- return err;
-
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xprt_sendmsg(sock, msg, base + xdr->page_base);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index c964b48eaaba..52908f9e6eab 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
static int rpc_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rpc_proc_show, PDE_DATA(inode));
+ return single_open(file, rpc_proc_show, pde_data(inode));
}
static const struct proc_ops rpc_proc_ops = {
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 2f59464e6524..d4a362c9e4b3 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -1,22 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/******************************************************************************
(c) 2008 NetApp. All Rights Reserved.
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-https://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4292278a9552..149171774bc6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -37,18 +37,37 @@
static void svc_unregister(const struct svc_serv *serv, struct net *net);
-#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function)
-
#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+ SVC_POOL_AUTO = -1, /* choose one of the others */
+ SVC_POOL_GLOBAL, /* no mapping, just a single global pool
+ * (legacy & UP mode) */
+ SVC_POOL_PERCPU, /* one pool per cpu */
+ SVC_POOL_PERNODE /* one pool per numa node */
+};
+
+/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
-struct svc_pool_map svc_pool_map = {
+
+struct svc_pool_map {
+ int count; /* How many svc_servs use us */
+ int mode; /* Note: int not enum to avoid
+ * warnings about "enumeration value
+ * not handled in switch" */
+ unsigned int npools;
+ unsigned int *pool_to; /* maps pool id to cpu or node */
+ unsigned int *to_pool; /* maps cpu or node to pool id */
+};
+
+static struct svc_pool_map svc_pool_map = {
.mode = SVC_POOL_DEFAULT
};
-EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
@@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
/*
* Add a reference to the global map of cpus to pools (and
- * vice versa). Initialise the map if we're the first user.
- * Returns the number of pools.
+ * vice versa) if pools are in use.
+ * Initialise the map if we're the first user.
+ * Returns the number of pools. If this is '1', no reference
+ * was taken.
*/
-unsigned int
+static unsigned int
svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
@@ -232,6 +253,7 @@ svc_pool_map_get(void)
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
+ WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -247,30 +269,36 @@ svc_pool_map_get(void)
break;
}
- if (npools < 0) {
+ if (npools <= 0) {
/* default, or memory allocation failure */
npools = 1;
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
+ if (npools == 1)
+ /* service is unpooled, so doesn't hold a reference */
+ m->count--;
+
mutex_unlock(&svc_pool_map_mutex);
- return m->npools;
+ return npools;
}
-EXPORT_SYMBOL_GPL(svc_pool_map_get);
/*
- * Drop a reference to the global map of cpus to pools.
+ * Drop a reference to the global map of cpus to pools, if
+ * pools were in use, i.e. if npools > 1.
* When the last reference is dropped, the map data is
* freed; this allows the sysadmin to change the pool
* mode using the pool_mode module option without
* rebooting or re-loading sunrpc.ko.
*/
-void
-svc_pool_map_put(void)
+static void
+svc_pool_map_put(int npools)
{
struct svc_pool_map *m = &svc_pool_map;
+ if (npools <= 1)
+ return;
mutex_lock(&svc_pool_map_mutex);
if (!--m->count) {
@@ -283,7 +311,6 @@ svc_pool_map_put(void)
mutex_unlock(&svc_pool_map_mutex);
}
-EXPORT_SYMBOL_GPL(svc_pool_map_put);
static int svc_pool_map_get_node(unsigned int pidx)
{
@@ -329,32 +356,35 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
}
}
-/*
- * Use the mapping mode to choose a pool for a given CPU.
- * Used when enqueueing an incoming RPC. Always returns
- * a non-NULL pool pointer.
+/**
+ * svc_pool_for_cpu - Select pool to run a thread on this cpu
+ * @serv: An RPC service
+ *
+ * Use the active CPU and the svc_pool_map's mode setting to
+ * select the svc thread pool to use. Once initialized, the
+ * svc_pool_map does not change.
+ *
+ * Return value:
+ * A pointer to an svc_pool
*/
-struct svc_pool *
-svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
{
struct svc_pool_map *m = &svc_pool_map;
+ int cpu = raw_smp_processor_id();
unsigned int pidx = 0;
- /*
- * An uninitialised map happens in a pure client when
- * lockd is brought up, so silently treat it the
- * same as SVC_POOL_GLOBAL.
- */
- if (svc_serv_is_pooled(serv)) {
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
- }
+ if (serv->sv_nrpools <= 1)
+ return serv->sv_pools;
+
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
}
+
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -424,7 +454,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- const struct svc_serv_ops *ops)
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -435,13 +465,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_nrthreads = 1;
+ kref_init(&serv->sv_refcnt);
serv->sv_stats = prog->pg_stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
- serv->sv_ops = ops;
+ serv->sv_threadfn = threadfn;
xdrsize = 0;
while (prog) {
prog->pg_lovers = prog->pg_nvers-1;
@@ -487,59 +517,56 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return serv;
}
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create - Create an RPC service
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
+ int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, /*npools*/1, ops);
+ return __svc_create(prog, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
-struct svc_serv *
-svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create_pooled - Create an RPC service with pooled threads
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ unsigned int bufsize,
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, ops);
+ serv = __svc_create(prog, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
out_err:
- svc_pool_map_put();
+ svc_pool_map_put(npools);
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
-void svc_shutdown_net(struct svc_serv *serv, struct net *net)
-{
- svc_close_net(serv, net);
-
- if (serv->sv_ops->svo_shutdown)
- serv->sv_ops->svo_shutdown(serv, net);
-}
-EXPORT_SYMBOL_GPL(svc_shutdown_net);
-
/*
* Destroy an RPC service. Should be called with appropriate locking to
- * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
+ * protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct svc_serv *serv)
+svc_destroy(struct kref *ref)
{
- dprintk("svc: svc_destroy(%s, %d)\n",
- serv->sv_program->pg_name,
- serv->sv_nrthreads);
-
- if (serv->sv_nrthreads) {
- if (--(serv->sv_nrthreads) != 0) {
- svc_sock_update_bufs(serv);
- return;
- }
- } else
- printk("svc_destroy: no threads for serv=%p!\n", serv);
+ struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
del_timer_sync(&serv->sv_temptimer);
/*
@@ -551,8 +578,7 @@ svc_destroy(struct svc_serv *serv)
cache_clean_deferred(serv);
- if (svc_serv_is_pooled(serv))
- svc_pool_map_put();
+ svc_pool_map_put(serv->sv_nrpools);
kfree(serv->sv_pools);
kfree(serv);
@@ -638,7 +664,7 @@ out_enomem:
}
EXPORT_SYMBOL_GPL(svc_rqst_alloc);
-struct svc_rqst *
+static struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
struct svc_rqst *rqstp;
@@ -647,14 +673,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return ERR_PTR(-ENOMEM);
- serv->sv_nrthreads++;
+ svc_get(serv);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads += 1;
+ spin_unlock_bh(&serv->sv_lock);
+
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
return rqstp;
}
-EXPORT_SYMBOL_GPL(svc_prepare_thread);
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
@@ -728,11 +757,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
- __module_get(serv->sv_ops->svo_module);
- task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
+ task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
- module_put(serv->sv_ops->svo_module);
svc_exit_thread(rqstp);
return PTR_ERR(task);
}
@@ -748,59 +775,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-
-/* destroy old threads */
-static int
-svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- struct task_struct *task;
- unsigned int state = serv->sv_nrthreads-1;
-
- /* destroy old threads */
- do {
- task = choose_victim(serv, pool, &state);
- if (task == NULL)
- break;
- send_sig(SIGINT, task, 1);
- nrservs++;
- } while (nrservs < 0);
-
- return 0;
-}
-
/*
* Create or destroy enough new threads to make the number
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Caller must ensure that mutual exclusion between this and
* server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
*/
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
-
- if (nrservs > 0)
- return svc_start_kthreads(serv, pool, nrservs);
- if (nrservs < 0)
- return svc_signal_kthreads(serv, pool, nrservs);
- return 0;
-}
-EXPORT_SYMBOL_GPL(svc_set_num_threads);
/* destroy old threads */
static int
@@ -821,11 +802,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
}
int
-svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
+ nrservs -= serv->sv_nrthreads;
} else {
spin_lock_bh(&pool->sp_lock);
nrservs -= pool->sp_nrthreads;
@@ -838,7 +818,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
return svc_stop_kthreads(serv, pool, nrservs);
return 0;
}
-EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+EXPORT_SYMBOL_GPL(svc_set_num_threads);
/**
* svc_rqst_replace_page - Replace one page in rq_pages[]
@@ -890,11 +870,14 @@ svc_exit_thread(struct svc_rqst *rqstp)
list_del_rcu(&rqstp->rq_all);
spin_unlock_bh(&pool->sp_lock);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads -= 1;
+ spin_unlock_bh(&serv->sv_lock);
+ svc_sock_update_bufs(serv);
+
svc_rqst_free(rqstp);
- /* Release the server */
- if (serv)
- svc_destroy(serv);
+ svc_put(serv);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
@@ -1222,7 +1205,7 @@ svc_generic_init_request(struct svc_rqst *rqstp,
goto err_bad_proc;
/* Initialize storage for argp and resp */
- memset(rqstp->rq_argp, 0, procp->pc_argsize);
+ memset(rqstp->rq_argp, 0, procp->pc_argzero);
memset(rqstp->rq_resp, 0, procp->pc_ressize);
/* Bump per-procedure stats counter */
@@ -1261,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off by GSS integrity and privacy services */
- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- clear_bit(RQ_DROPME, &rqstp->rq_flags);
+ __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ __clear_bit(RQ_DROPME, &rqstp->rq_flags);
svc_putu32(resv, rqstp->rq_xid);
@@ -1382,7 +1365,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_authorise(rqstp);
close_xprt:
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ svc_xprt_close(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
@@ -1451,8 +1434,7 @@ svc_process(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
- struct svc_serv *serv = rqstp->rq_server;
- u32 dir;
+ __be32 dir;
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
if (!fail_sunrpc.ignore_server_disconnect &&
@@ -1467,7 +1449,7 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_next_page = &rqstp->rq_respages[1];
resv->iov_base = page_address(rqstp->rq_respages[0]);
resv->iov_len = 0;
- rqstp->rq_res.pages = rqstp->rq_respages + 1;
+ rqstp->rq_res.pages = rqstp->rq_next_page;
rqstp->rq_res.len = 0;
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = 0;
@@ -1475,18 +1457,17 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
- dir = svc_getnl(argv);
- if (dir != 0) {
- /* direction != CALL */
- svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
- serv->sv_stats->rpcbadfmt++;
+ dir = svc_getu32(argv);
+ if (dir != rpc_call)
+ goto out_baddir;
+ if (!svc_process_common(rqstp, argv, resv))
goto out_drop;
- }
-
- /* Returns 1 for send, 0 for drop */
- if (likely(svc_process_common(rqstp, argv, resv)))
- return svc_send(rqstp);
+ return svc_send(rqstp);
+out_baddir:
+ svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
+ be32_to_cpu(dir));
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
return 0;
@@ -1573,8 +1554,12 @@ out:
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/*
- * Return (transport-specific) limit on the rpc payload.
+/**
+ * svc_max_payload - Return transport-specific limit on the RPC payload
+ * @rqstp: RPC transaction context
+ *
+ * Returns the maximum number of payload bytes the current transport
+ * allows.
*/
u32 svc_max_payload(const struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1e99ba1b9d72..2106003645a7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,6 +6,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
@@ -161,7 +162,7 @@ static void svc_xprt_free(struct kref *kref)
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
svcauth_unix_info_release(xprt);
put_cred(xprt->xpt_cred);
- put_net(xprt->xpt_net);
+ put_net_track(xprt->xpt_net, &xprt->ns_tracker);
/* See comment on corresponding get in xs_setup_bc_tcp(): */
if (xprt->xpt_bc_xprt)
xprt_put(xprt->xpt_bc_xprt);
@@ -197,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_net = get_net(net);
+ xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC);
strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -243,7 +244,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(xprt))
trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, xprt);
+ xcl->xcl_name, sap, len, xprt);
return xprt;
}
@@ -264,15 +265,13 @@ void svc_xprt_received(struct svc_xprt *xprt)
return;
}
- trace_svc_xprt_received(xprt);
-
/* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_enqueue_xprt with:
+ * 'put', so we need a reference to call svc_xprt_enqueue with:
*/
svc_xprt_get(xprt);
smp_mb__before_atomic();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+ svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_received);
@@ -286,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
@@ -322,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
return -EPROTONOSUPPORT;
}
-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+/**
+ * svc_xprt_create - Add a new listener to @serv
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @family: network address family
+ * @port: listener port
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return values:
+ * %0: New listener added successfully
+ * %-EPROTONOSUPPORT: Requested transport type not supported
+ */
+int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
int err;
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
}
return err;
}
-EXPORT_SYMBOL_GPL(svc_create_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_create);
/*
* Copy the local and remote xprt addresses to the rqstp structure
@@ -412,6 +425,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ if (xpt_flags & BIT(XPT_BUSY))
+ return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
@@ -424,11 +439,15 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
return false;
}
-void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+/**
+ * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
+ * @xprt: transport with data pending
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
- int cpu;
if (!svc_xprt_ready(xprt))
return;
@@ -441,8 +460,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
return;
- cpu = get_cpu();
- pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+ pool = svc_pool_for_cpu(xprt->xpt_server);
atomic_long_inc(&pool->sp_stats.packets);
@@ -465,21 +483,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
rqstp = NULL;
out_unlock:
rcu_read_unlock();
- put_cpu();
- trace_svc_xprt_do_enqueue(xprt, rqstp);
-}
-EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
-
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
-{
- if (test_bit(XPT_BUSY, &xprt->xpt_flags))
- return;
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+ trace_svc_xprt_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -687,8 +691,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
set_current_state(TASK_RUNNING);
return -EINTR;
}
- trace_svc_alloc_arg_err(pages);
- schedule_timeout(msecs_to_jiffies(500));
+ trace_svc_alloc_arg_err(pages, ret);
+ memalloc_retry_wait(GFP_KERNEL);
}
rqstp->rq_page_end = &rqstp->rq_pages[pages];
rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
@@ -842,8 +846,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else
svc_xprt_received(xprt);
+
out:
- trace_svc_handle_xprt(xprt, len);
return len;
}
@@ -1061,7 +1065,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
svc_xprt_put(xprt);
}
-void svc_close_xprt(struct svc_xprt *xprt)
+/**
+ * svc_xprt_close - Close a client connection
+ * @xprt: transport to disconnect
+ *
+ */
+void svc_xprt_close(struct svc_xprt *xprt)
{
trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
@@ -1076,7 +1085,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
*/
svc_delete_xprt(xprt);
}
-EXPORT_SYMBOL_GPL(svc_close_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_close);
static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
{
@@ -1128,7 +1137,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
}
}
-/*
+/**
+ * svc_xprt_destroy_all - Destroy transports associated with @serv
+ * @serv: RPC service to be shut down
+ * @net: target network namespace
+ *
* Server threads may still be running (especially in the case where the
* service is still running in other network namespaces).
*
@@ -1140,7 +1153,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
* threads, we may need to wait a little while and then check again to
* see if they're done.
*/
-void svc_close_net(struct svc_serv *serv, struct net *net)
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
{
int delay = 0;
@@ -1151,6 +1164,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
msleep(delay++);
}
}
+EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
/*
* Handle defer and revisit of requests
@@ -1213,7 +1227,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
- dr->xprt_hlen = rqstp->rq_xprt_hlen;
+ dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
+ rqstp->rq_xprt_ctxt = NULL;
/* back up head to the start of the buffer and copy */
skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
@@ -1223,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- set_bit(RQ_DROPME, &rqstp->rq_flags);
+ __set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
@@ -1239,21 +1254,21 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
trace_svc_defer_recv(dr);
/* setup iov_base past transport header */
- rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
+ rqstp->rq_arg.head[0].iov_base = dr->args;
/* The iov_len does not include the transport header bytes */
- rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
+ rqstp->rq_arg.head[0].iov_len = dr->argslen << 2;
rqstp->rq_arg.page_len = 0;
/* The rq_arg.len includes the transport header bytes */
- rqstp->rq_arg.len = dr->argslen<<2;
+ rqstp->rq_arg.len = dr->argslen << 2;
rqstp->rq_prot = dr->prot;
memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
rqstp->rq_addrlen = dr->addrlen;
/* Save off transport header len in case we get deferred again */
- rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
+ rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
svc_xprt_received(rqstp->rq_xprt);
- return (dr->argslen<<2) - dr->xprt_hlen;
+ return dr->argslen << 2;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5a8b8e03fdd4..e72ba2f13f6c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -31,10 +31,12 @@
*/
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
[RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
+ [RPC_AUTH_TLS] = (struct auth_ops __force __rcu *)&svcauth_tls,
};
static struct auth_ops *
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d7ed7d49115a..b1efc34db6ed 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,6 +37,7 @@ struct unix_domain {
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
{
@@ -789,6 +790,65 @@ struct auth_ops svcauth_null = {
static int
+svcauth_tls_accept(struct svc_rqst *rqstp)
+{
+ struct svc_cred *cred = &rqstp->rq_cred;
+ struct kvec *argv = rqstp->rq_arg.head;
+ struct kvec *resv = rqstp->rq_res.head;
+
+ if (argv->iov_len < XDR_UNIT * 3)
+ return SVC_GARBAGE;
+
+ /* Call's cred length */
+ if (svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Call's verifier flavor and its length */
+ if (svc_getu32(argv) != rpc_auth_null ||
+ svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
+ return SVC_DENIED;
+ }
+
+ /* AUTH_TLS is not valid on non-NULL procedures */
+ if (rqstp->rq_proc != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Mapping to nobody uid/gid is required */
+ cred->cr_uid = INVALID_UID;
+ cred->cr_gid = INVALID_GID;
+ cred->cr_group_info = groups_alloc(0);
+ if (cred->cr_group_info == NULL)
+ return SVC_CLOSE; /* kmalloc failure - client must retry */
+
+ /* Reply's verifier */
+ svc_putnl(resv, RPC_AUTH_NULL);
+ if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+ svc_putnl(resv, 8);
+ memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
+ resv->iov_len += 8;
+ } else
+ svc_putnl(resv, 0);
+
+ rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
+ return SVC_OK;
+}
+
+struct auth_ops svcauth_tls = {
+ .name = "tls",
+ .owner = THIS_MODULE,
+ .flavour = RPC_AUTH_TLS,
+ .accept = svcauth_tls_accept,
+ .release = svcauth_null_release,
+ .set_client = svcauth_unix_set_client,
+};
+
+
+static int
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478f857cdaed..2fc98fea59b4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -117,15 +117,6 @@ static void svc_reclassify_socket(struct socket *sock)
*/
static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
-
- if (skb) {
- struct svc_sock *svsk =
- container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-
- rqstp->rq_xprt_ctxt = NULL;
- skb_free_datagram_locked(svsk->sk_sk, skb);
- }
}
/**
@@ -259,8 +250,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
ssize_t len;
size_t t;
- rqstp->rq_xprt_hlen = 0;
-
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
@@ -309,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
- set_bit(RQ_SECURE, &rqstp->rq_flags);
+ __set_bit(RQ_SECURE, &rqstp->rq_flags);
else
- clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ __clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -464,7 +453,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err < 0)
goto out_recv_err;
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
if (!skb)
goto out_recv_err;
@@ -579,15 +568,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
+ err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (err < 0)
+ goto out_unlock;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
- xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
- xdr_free_bvec(xdr);
}
+ xdr_free_bvec(xdr);
trace_svcsock_udp_send(xprt, err);
-
+out_unlock:
mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
@@ -1016,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
- set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ __set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
- clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+ __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1096,7 +1088,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
int ret;
*sentp = 0;
- xdr_alloc_bvec(xdr, GFP_KERNEL);
+ ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
if (ret < 0)
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 2766dd21935b..c1f559892ae8 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -93,11 +93,14 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
ssize_t ret;
- if (!xprt)
- return 0;
+ if (!xprt) {
+ ret = sprintf(buf, "<closed>\n");
+ goto out;
+ }
ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
xprt_put(xprt);
- return ret + 1;
+out:
+ return ret;
}
static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
@@ -105,38 +108,45 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
char *buf)
{
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
- struct sockaddr_storage saddr;
- struct sock_xprt *sock;
- ssize_t ret = -1;
+ size_t buflen = PAGE_SIZE;
+ ssize_t ret;
if (!xprt || !xprt_connected(xprt)) {
- xprt_put(xprt);
- return -ENOTCONN;
- }
-
- sock = container_of(xprt, struct sock_xprt, xprt);
- if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
- goto out;
-
- ret = sprintf(buf, "%pISc\n", &saddr);
-out:
+ ret = sprintf(buf, "<closed>\n");
+ } else if (xprt->ops->get_srcaddr) {
+ ret = xprt->ops->get_srcaddr(xprt, buf, buflen);
+ if (ret > 0) {
+ if (ret < buflen - 1) {
+ buf[ret] = '\n';
+ ret++;
+ buf[ret] = '\0';
+ }
+ } else
+ ret = sprintf(buf, "<closed>\n");
+ } else
+ ret = sprintf(buf, "<not a socket>\n");
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+ struct kobj_attribute *attr, char *buf)
{
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ unsigned short srcport = 0;
+ size_t buflen = PAGE_SIZE;
ssize_t ret;
if (!xprt || !xprt_connected(xprt)) {
- xprt_put(xprt);
- return -ENOTCONN;
+ ret = sprintf(buf, "<closed>\n");
+ goto out;
}
- ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
+ if (xprt->ops->get_srcport)
+ srcport = xprt->ops->get_srcport(xprt);
+
+ ret = snprintf(buf, buflen,
+ "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
"max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
"binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
"backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
@@ -144,14 +154,12 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
xprt->sending.qlen, xprt->pending.qlen,
- xprt->backlog.qlen, xprt->main,
- (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
- get_srcport(xprt) : 0,
+ xprt->backlog.qlen, xprt->main, srcport,
atomic_long_read(&xprt->queuelen),
- (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
- xprt->address_strings[RPC_DISPLAY_PORT] : "0");
+ xprt->address_strings[RPC_DISPLAY_PORT]);
+out:
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
@@ -163,10 +171,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
int locked, connected, connecting, close_wait, bound, binding,
closing, congested, cwnd_wait, write_space, offline, remove;
- if (!xprt)
- return 0;
-
- if (!xprt->state) {
+ if (!(xprt && xprt->state)) {
ret = sprintf(buf, "state=CLOSED\n");
} else {
locked = test_bit(XPRT_LOCKED, &xprt->state);
@@ -198,7 +203,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
}
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
@@ -217,7 +222,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
xprt_switch->xps_nunique_destaddr_xprts,
atomic_long_read(&xprt_switch->xps_queuelen));
xprt_switch_put(xprt_switch);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
@@ -286,8 +291,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
int offline = 0, online = 0, remove = 0;
struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
- if (!xprt)
- return 0;
+ if (!xprt || !xps) {
+ count = 0;
+ goto out_put;
+ }
if (!strncmp(buf, "offline", 7))
offline = 1;
@@ -295,8 +302,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
online = 1;
else if (!strncmp(buf, "remove", 6))
remove = 1;
- else
- return -EINVAL;
+ else {
+ count = -EINVAL;
+ goto out_put;
+ }
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
count = -EINTR;
@@ -307,29 +316,14 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
goto release_tasks;
}
if (offline) {
- set_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive--;
- spin_unlock(&xps->xps_lock);
+ xprt_set_offline_locked(xprt, xps);
} else if (online) {
- clear_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive++;
- spin_unlock(&xps->xps_lock);
+ xprt_set_online_locked(xprt, xps);
} else if (remove) {
- if (test_bit(XPRT_OFFLINE, &xprt->state)) {
- set_bit(XPRT_REMOVE, &xprt->state);
- xprt_force_disconnect(xprt);
- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
- if (!xprt->sending.qlen &&
- !xprt->pending.qlen &&
- !xprt->backlog.qlen &&
- !atomic_long_read(&xprt->queuelen))
- rpc_xprt_switch_remove_xprt(xps, xprt);
- }
- } else {
+ if (test_bit(XPRT_OFFLINE, &xprt->state))
+ xprt_delete_locked(xprt, xps);
+ else
count = -EINVAL;
- }
}
release_tasks:
@@ -422,6 +416,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_change_state.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
static struct kobj_attribute rpc_sysfs_xprt_switch_info =
__ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
@@ -430,6 +425,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
&rpc_sysfs_xprt_switch_info.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
static struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
@@ -439,14 +435,14 @@ static struct kobj_type rpc_sysfs_client_type = {
static struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
- .default_attrs = rpc_sysfs_xprt_switch_attrs,
+ .default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
static struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
- .default_attrs = rpc_sysfs_xprt_attrs,
+ .default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_namespace,
};
@@ -522,13 +518,16 @@ void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
struct net *net)
{
struct rpc_sysfs_client *rpc_client;
+ struct rpc_sysfs_xprt_switch *xswitch =
+ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+
+ if (!xswitch)
+ return;
rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj,
net, clnt->cl_clid);
if (rpc_client) {
char name[] = "switch";
- struct rpc_sysfs_xprt_switch *xswitch =
- (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
int ret;
clnt->cl_sysfs = rpc_client;
@@ -562,6 +561,8 @@ void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
rpc_xprt_switch->xprt_switch = xprt_switch;
rpc_xprt_switch->xprt = xprt;
kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD);
+ } else {
+ xprt_switch->xps_sysfs = NULL;
}
}
@@ -573,6 +574,9 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
struct rpc_sysfs_xprt_switch *switch_obj =
(struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+ if (!switch_obj)
+ return;
+
rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags);
if (rpc_xprt) {
xprt->xprt_sysfs = rpc_xprt;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index df194cc07035..336a7c7833e4 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -775,6 +775,34 @@ static void xdr_buf_pages_shift_left(const struct xdr_buf *buf,
xdr_buf_tail_copy_left(buf, 0, len - buf->page_len, shift);
}
+static void xdr_buf_head_shift_left(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *head = buf->head;
+ unsigned int bytes;
+
+ if (!shift || !len)
+ return;
+
+ if (shift > base) {
+ bytes = (shift - base);
+ if (bytes >= len)
+ return;
+ base += bytes;
+ len -= bytes;
+ }
+
+ if (base < head->iov_len) {
+ bytes = min_t(unsigned int, len, head->iov_len - base);
+ memmove(head->iov_base + (base - shift),
+ head->iov_base + base, bytes);
+ base += bytes;
+ len -= bytes;
+ }
+ xdr_buf_pages_shift_left(buf, base - head->iov_len, len, shift);
+}
+
/**
* xdr_shrink_bufhead
* @buf: xdr_buf
@@ -919,7 +947,29 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer into which to encode data
+ * @pages: list of pages to decode into
+ * @rqst: pointer to controlling rpc_rqst, for debugging
+ *
+ */
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+ struct page **pages, struct rpc_rqst *rqst)
+{
+ xdr_reset_scratch_buffer(xdr);
+
+ xdr->buf = buf;
+ xdr->page_ptr = pages;
+ xdr->iov = NULL;
+ xdr->p = page_address(*pages);
+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+ xdr->rqst = rqst;
+}
+EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
+
+/**
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
@@ -931,26 +981,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
@@ -964,6 +1017,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
@@ -972,14 +1026,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - frag1bytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
@@ -1463,71 +1522,35 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
-unsigned int xdr_align_data(struct xdr_stream *xdr, unsigned int offset,
- unsigned int length)
-{
- struct xdr_buf *buf = xdr->buf;
- unsigned int from, bytes, len;
- unsigned int shift;
-
- xdr_realign_pages(xdr);
- from = xdr_page_pos(xdr);
-
- if (from >= buf->page_len + buf->tail->iov_len)
- return 0;
- if (from + buf->head->iov_len >= buf->len)
- return 0;
-
- len = buf->len - buf->head->iov_len;
-
- /* We only shift data left! */
- if (WARN_ONCE(from < offset, "SUNRPC: misaligned data src=%u dst=%u\n",
- from, offset))
- return 0;
- if (WARN_ONCE(offset > buf->page_len,
- "SUNRPC: buffer overflow. offset=%u, page_len=%u\n",
- offset, buf->page_len))
- return 0;
-
- /* Move page data to the left */
- shift = from - offset;
- xdr_buf_pages_shift_left(buf, from, len, shift);
-
- bytes = xdr_stream_remaining(xdr);
- if (length > bytes)
- length = bytes;
- bytes -= length;
-
- xdr->buf->len -= shift;
- xdr_set_page(xdr, offset + length, bytes);
- return length;
-}
-EXPORT_SYMBOL_GPL(xdr_align_data);
-
-unsigned int xdr_expand_hole(struct xdr_stream *xdr, unsigned int offset,
- unsigned int length)
+/**
+ * xdr_set_pagelen - Sets the length of the XDR pages
+ * @xdr: pointer to xdr_stream struct
+ * @len: new length of the XDR page data
+ *
+ * Either grows or shrinks the length of the xdr pages by setting pagelen to
+ * @len bytes. When shrinking, any extra data is moved into buf->tail, whereas
+ * when growing any data beyond the current pointer is moved into the tail.
+ *
+ * Returns True if the operation was successful, and False otherwise.
+ */
+void xdr_set_pagelen(struct xdr_stream *xdr, unsigned int len)
{
struct xdr_buf *buf = xdr->buf;
- unsigned int from, to, shift;
-
- xdr_realign_pages(xdr);
- from = xdr_page_pos(xdr);
- to = xdr_align_size(offset + length);
-
- /* Could the hole be behind us? */
- if (to > from) {
- unsigned int buflen = buf->len - buf->head->iov_len;
- shift = to - from;
- xdr_buf_try_expand(buf, shift);
- xdr_buf_pages_shift_right(buf, from, buflen, shift);
- xdr_set_page(xdr, to, xdr_stream_remaining(xdr));
- } else if (to != from)
- xdr_align_data(xdr, to, 0);
- xdr_buf_pages_zero(buf, offset, length);
+ size_t remaining = xdr_stream_remaining(xdr);
+ size_t base = 0;
- return length;
+ if (len < buf->page_len) {
+ base = buf->page_len - len;
+ xdr_shrink_pagelen(buf, len);
+ } else {
+ xdr_buf_head_shift_right(buf, xdr_stream_pos(xdr),
+ buf->page_len, remaining);
+ if (len > buf->page_len)
+ xdr_buf_try_expand(buf, len - buf->page_len);
+ }
+ xdr_set_tail_base(xdr, base, remaining);
}
-EXPORT_SYMBOL_GPL(xdr_expand_hole);
+EXPORT_SYMBOL_GPL(xdr_set_pagelen);
/**
* xdr_enter_page - decode data from the XDR page
@@ -1574,7 +1597,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
*
* @buf and @subbuf may be pointers to the same struct xdr_buf.
*
- * Returns -1 if base of length are out of bounds.
+ * Returns -1 if base or length are out of bounds.
*/
int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
unsigned int base, unsigned int len)
@@ -1672,6 +1695,60 @@ bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
/**
+ * xdr_stream_move_subsegment - Move part of a stream to another position
+ * @xdr: the source xdr_stream
+ * @offset: the source offset of the segment
+ * @target: the target offset of the segment
+ * @length: the number of bytes to move
+ *
+ * Moves @length bytes from @offset to @target in the xdr_stream, overwriting
+ * anything in its space. Returns the number of bytes in the segment.
+ */
+unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int target, unsigned int length)
+{
+ struct xdr_buf buf;
+ unsigned int shift;
+
+ if (offset < target) {
+ shift = target - offset;
+ if (xdr_buf_subsegment(xdr->buf, &buf, offset, shift + length) < 0)
+ return 0;
+ xdr_buf_head_shift_right(&buf, 0, length, shift);
+ } else if (offset > target) {
+ shift = offset - target;
+ if (xdr_buf_subsegment(xdr->buf, &buf, target, shift + length) < 0)
+ return 0;
+ xdr_buf_head_shift_left(&buf, shift, length, shift);
+ }
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_move_subsegment);
+
+/**
+ * xdr_stream_zero - zero out a portion of an xdr_stream
+ * @xdr: an xdr_stream to zero out
+ * @offset: the starting point in the stream
+ * @length: the number of bytes to zero
+ */
+unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int length)
+{
+ struct xdr_buf buf;
+
+ if (xdr_buf_subsegment(xdr->buf, &buf, offset, length) < 0)
+ return 0;
+ if (buf.head[0].iov_len)
+ xdr_buf_iov_zero(buf.head, 0, buf.head[0].iov_len);
+ if (buf.page_len > 0)
+ xdr_buf_pages_zero(&buf, 0, buf.page_len);
+ if (buf.tail[0].iov_len)
+ xdr_buf_iov_zero(buf.tail, 0, buf.tail[0].iov_len);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_zero);
+
+/**
* xdr_buf_trim - lop at most "len" bytes off the end of "buf"
* @buf: buf to be trimmed
* @len: number of bytes to reduce "buf" by
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a02de2bddb28..656cec208371 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -69,10 +69,11 @@
/*
* Local functions
*/
-static void xprt_init(struct rpc_xprt *xprt, struct net *net);
+static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
-static void xprt_destroy(struct rpc_xprt *xprt);
-static void xprt_request_init(struct rpc_task *task);
+static void xprt_destroy(struct rpc_xprt *xprt);
+static void xprt_request_init(struct rpc_task *task);
+static int xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
@@ -929,12 +930,7 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_lock_write(xprt, task))
return;
- if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- trace_xprt_disconnect_cleanup(xprt);
- xprt->ops->close(xprt);
- }
-
- if (!xprt_connected(xprt)) {
+ if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on_timeout(&xprt->pending, task, NULL,
xprt_request_timeout(task->tk_rqstp));
@@ -1143,16 +1139,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
* @task: RPC task
*
*/
-void
+int
xprt_request_enqueue_receive(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ int ret;
if (!xprt_request_need_enqueue_receive(task, req))
- return;
+ return 0;
- xprt_request_prepare(task->tk_rqstp);
+ ret = xprt_request_prepare(task->tk_rqstp, &req->rq_rcv_buf);
+ if (ret)
+ return ret;
spin_lock(&xprt->queue_lock);
/* Update the softirq receive buffer */
@@ -1166,6 +1165,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
+ return 0;
}
/**
@@ -1218,6 +1218,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
xprt->stat.recvs++;
+ xdr_free_bvec(&req->rq_rcv_buf);
+ req->rq_private_buf.bvec = NULL;
req->rq_private_buf.len = copied;
/* Ensure all writes are done before we update */
/* req->rq_reply_bytes_recvd */
@@ -1336,8 +1338,14 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
{
struct rpc_rqst *pos, *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ int ret;
if (xprt_request_need_enqueue_transmit(task, req)) {
+ ret = xprt_request_prepare(task->tk_rqstp, &req->rq_snd_buf);
+ if (ret) {
+ task->tk_status = ret;
+ return;
+ }
req->rq_bytes_sent = 0;
spin_lock(&xprt->queue_lock);
/*
@@ -1354,17 +1362,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
INIT_LIST_HEAD(&req->rq_xmit2);
goto out;
}
- } else if (RPC_IS_SWAPPER(task)) {
- list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
- if (pos->rq_cong || pos->rq_bytes_sent)
- continue;
- if (RPC_IS_SWAPPER(pos->rq_task))
- continue;
- /* Note: req is added _before_ pos */
- list_add_tail(&req->rq_xmit, &pos->rq_xmit);
- INIT_LIST_HEAD(&req->rq_xmit2);
- goto out;
- }
} else if (!req->rq_seqno) {
list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
if (pos->rq_task->tk_owner != task->tk_owner)
@@ -1408,6 +1405,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
} else
list_del(&req->rq_xmit2);
atomic_long_dec(&req->rq_xprt->xmit_queuelen);
+ xdr_free_bvec(&req->rq_snd_buf);
}
/**
@@ -1444,8 +1442,6 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
xprt_is_pinned_rqst(req)) {
spin_lock(&xprt->queue_lock);
- xprt_request_dequeue_transmit_locked(task);
- xprt_request_dequeue_receive_locked(task);
while (xprt_is_pinned_rqst(req)) {
set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@@ -1453,24 +1449,30 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
spin_lock(&xprt->queue_lock);
clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
}
+ xprt_request_dequeue_transmit_locked(task);
+ xprt_request_dequeue_receive_locked(task);
spin_unlock(&xprt->queue_lock);
+ xdr_free_bvec(&req->rq_rcv_buf);
}
}
/**
* xprt_request_prepare - prepare an encoded request for transport
* @req: pointer to rpc_rqst
+ * @buf: pointer to send/rcv xdr_buf
*
* Calls into the transport layer to do whatever is needed to prepare
* the request for transmission or receive.
+ * Returns error, or zero.
*/
-void
-xprt_request_prepare(struct rpc_rqst *req)
+static int
+xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf)
{
struct rpc_xprt *xprt = req->rq_xprt;
if (xprt->ops->prepare_request)
- xprt->ops->prepare_request(req);
+ return xprt->ops->prepare_request(req, buf);
+ return 0;
}
/**
@@ -1503,6 +1505,9 @@ bool xprt_prepare_transmit(struct rpc_task *task)
return false;
}
+ if (atomic_read(&xprt->swapper))
+ /* This will be clear in __rpc_execute */
+ current->flags |= PF_MEMALLOC;
return true;
}
@@ -1692,7 +1697,7 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
goto out;
++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
- req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+ req = kzalloc(sizeof(*req), rpc_task_gfp_mask());
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
@@ -1783,7 +1788,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt)
{
int id;
- id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL);
+ id = ida_alloc(&rpc_xprt_ids, GFP_KERNEL);
if (id < 0)
return id;
@@ -1793,7 +1798,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt)
static void xprt_free_id(struct rpc_xprt *xprt)
{
- ida_simple_remove(&rpc_xprt_ids, xprt->id);
+ ida_free(&rpc_xprt_ids, xprt->id);
}
struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
@@ -1817,10 +1822,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
goto out_free;
list_add(&req->rq_list, &xprt->free);
}
- if (max_alloc > num_prealloc)
- xprt->max_reqs = max_alloc;
- else
- xprt->max_reqs = num_prealloc;
+ xprt->max_reqs = max_t(unsigned int, max_alloc, num_prealloc);
xprt->min_reqs = num_prealloc;
xprt->num_reqs = num_prealloc;
@@ -1835,7 +1837,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
void xprt_free(struct rpc_xprt *xprt)
{
- put_net(xprt->xprt_net);
+ put_net_track(xprt->xprt_net, &xprt->ns_tracker);
xprt_free_all_slots(xprt);
xprt_free_id(xprt);
rpc_sysfs_xprt_destroy(xprt);
@@ -1863,7 +1865,7 @@ xprt_alloc_xid(struct rpc_xprt *xprt)
static void
xprt_init_xid(struct rpc_xprt *xprt)
{
- xprt->xid = prandom_u32();
+ xprt->xid = get_random_u32();
}
static void
@@ -1967,8 +1969,6 @@ void xprt_release(struct rpc_task *task)
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xdr_free_bvec(&req->rq_rcv_buf);
- xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
if (req->rq_release_snd_buf)
@@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
xprt_init_xid(xprt);
- xprt->xprt_net = get_net(net);
+ xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL);
}
/**
@@ -2112,7 +2112,14 @@ static void xprt_destroy(struct rpc_xprt *xprt)
*/
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+ /*
+ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
+ * is cleared. We use ->transport_lock to ensure the mod_timer()
+ * can only run *before* del_time_sync(), never after.
+ */
+ spin_lock(&xprt->transport_lock);
del_timer_sync(&xprt->timer);
+ spin_unlock(&xprt->transport_lock);
/*
* Destroy sockets etc from the system workqueue so they can
@@ -2151,3 +2158,35 @@ void xprt_put(struct rpc_xprt *xprt)
kref_put(&xprt->kref, xprt_destroy_kref);
}
EXPORT_SYMBOL_GPL(xprt_put);
+
+void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ }
+}
+
+void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ }
+}
+
+void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (test_and_set_bit(XPRT_REMOVE, &xprt->state))
+ return;
+
+ xprt_force_disconnect(xprt);
+ if (!test_bit(XPRT_CONNECTED, &xprt->state))
+ return;
+
+ if (!xprt->sending.qlen && !xprt->pending.qlen &&
+ !xprt->backlog.qlen && !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt, true);
+}
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 1693f81aae37..701250b305db 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -27,6 +27,7 @@ typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline;
static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt)
@@ -61,11 +62,11 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
}
static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
- struct rpc_xprt *xprt)
+ struct rpc_xprt *xprt, bool offline)
{
if (unlikely(xprt == NULL))
return;
- if (!test_bit(XPRT_OFFLINE, &xprt->state))
+ if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline)
xps->xps_nactive--;
xps->xps_nxprts--;
if (xps->xps_nxprts == 0)
@@ -78,14 +79,15 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
* rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
* @xps: pointer to struct rpc_xprt_switch
* @xprt: pointer to struct rpc_xprt
+ * @offline: indicates if the xprt that's being removed is in an offline state
*
* Removes xprt from the list of struct rpc_xprt in xps.
*/
void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
- struct rpc_xprt *xprt)
+ struct rpc_xprt *xprt, bool offline)
{
spin_lock(&xps->xps_lock);
- xprt_switch_remove_xprt_locked(xps, xprt);
+ xprt_switch_remove_xprt_locked(xps, xprt, offline);
spin_unlock(&xps->xps_lock);
xprt_put(xprt);
}
@@ -101,7 +103,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
{
int id;
- id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags);
+ id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags);
if (id < 0)
return id;
@@ -111,7 +113,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
static void xprt_switch_free_id(struct rpc_xprt_switch *xps)
{
- ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id);
+ ida_free(&rpc_xprtswitch_ids, xps->xps_id);
}
/**
@@ -154,7 +156,7 @@ static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
xprt = list_first_entry(&xps->xps_xprt_list,
struct rpc_xprt, xprt_switch);
- xprt_switch_remove_xprt_locked(xps, xprt);
+ xprt_switch_remove_xprt_locked(xps, xprt, true);
spin_unlock(&xps->xps_lock);
xprt_put(xprt);
spin_lock(&xps->xps_lock);
@@ -249,6 +251,18 @@ struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
}
static
+struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head)
+{
+ struct rpc_xprt *pos;
+
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (!xprt_is_active(pos))
+ return pos;
+ }
+ return NULL;
+}
+
+static
struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
{
struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
@@ -259,8 +273,9 @@ struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
}
static
-struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
- const struct rpc_xprt *cur)
+struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head,
+ const struct rpc_xprt *cur,
+ bool find_active)
{
struct rpc_xprt *pos;
bool found = false;
@@ -268,14 +283,25 @@ struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == pos)
found = true;
- if (found && xprt_is_active(pos))
+ if (found && ((find_active && xprt_is_active(pos)) ||
+ (!find_active && xprt_is_active(pos))))
return pos;
}
return NULL;
}
static
-struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
+ const struct rpc_xprt *cur)
+{
+ return _xprt_switch_find_current_entry(head, cur, true);
+}
+
+static
+struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi,
+ struct rpc_xprt *first_entry(struct list_head *head),
+ struct rpc_xprt *current_entry(struct list_head *head,
+ const struct rpc_xprt *cur))
{
struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
struct list_head *head;
@@ -284,8 +310,30 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
return NULL;
head = &xps->xps_xprt_list;
if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
- return xprt_switch_find_first_entry(head);
- return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
+ return first_entry(head);
+ return current_entry(head, xpi->xpi_cursor);
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+{
+ return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry,
+ xprt_switch_find_current_entry);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head,
+ const struct rpc_xprt *cur)
+{
+ return _xprt_switch_find_current_entry(head, cur, false);
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
+{
+ return _xprt_iter_current_entry(xpi,
+ xprt_switch_find_first_entry_offline,
+ xprt_switch_find_current_entry_offline);
}
bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
@@ -310,7 +358,7 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
- const struct rpc_xprt *cur)
+ const struct rpc_xprt *cur, bool check_active)
{
struct rpc_xprt *pos, *prev = NULL;
bool found = false;
@@ -318,7 +366,12 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == prev)
found = true;
- if (found && xprt_is_active(pos))
+ /* for request to return active transports return only
+ * active, for request to return offline transports
+ * return only offline
+ */
+ if (found && ((check_active && xprt_is_active(pos)) ||
+ (!check_active && !xprt_is_active(pos))))
return pos;
prev = pos;
}
@@ -355,7 +408,7 @@ struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head
{
struct rpc_xprt *ret;
- ret = xprt_switch_find_next_entry(head, cur);
+ ret = xprt_switch_find_next_entry(head, cur, true);
if (ret != NULL)
return ret;
return xprt_switch_find_first_entry(head);
@@ -397,7 +450,14 @@ static
struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur)
{
- return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false);
}
static
@@ -407,6 +467,13 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
xprt_switch_find_next_entry_all);
}
+static
+struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi)
+{
+ return xprt_iter_next_entry_multiple(xpi,
+ xprt_switch_find_next_entry_offline);
+}
+
/*
* xprt_iter_rewind - Resets the xprt iterator
* @xpi: pointer to rpc_xprt_iter
@@ -414,7 +481,6 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
* Resets xpi to ensure that it points to the first entry in the list
* of transports.
*/
-static
void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
{
rcu_read_lock();
@@ -460,6 +526,12 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
__xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
}
+void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi,
+ struct rpc_xprt_switch *xps)
+{
+ __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline);
+}
+
/**
* xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
* @xpi: pointer to rpc_xprt_iter
@@ -574,3 +646,10 @@ const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
.xpi_xprt = xprt_iter_current_entry,
.xpi_next = xprt_iter_next_entry_all,
};
+
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = {
+ .xpi_rewind = xprt_iter_default_rewind,
+ .xpi_xprt = xprt_iter_current_entry_offline,
+ .xpi_next = xprt_iter_next_entry_offline,
+};
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 17f174d6ea3b..e4d84a13c566 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -13,10 +13,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
#undef RPCRDMA_BACKCHANNEL_DEBUG
/**
@@ -193,7 +189,7 @@ create_req:
return NULL;
size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE);
- req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
+ req = rpcrdma_req_create(r_xprt, size);
if (!req)
return NULL;
if (rpcrdma_req_setup(r_xprt, req)) {
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ff699307e820..ffbf99894970 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -45,10 +45,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_mr *mr)
{
@@ -128,16 +124,16 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
unsigned int depth = ep->re_max_fr_depth;
struct scatterlist *sg;
struct ib_mr *frmr;
- int rc;
+
+ sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS,
+ ibdev_to_node(ep->re_id->device));
+ if (!sg)
+ return -ENOMEM;
frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
if (IS_ERR(frmr))
goto out_mr_err;
- sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS);
- if (!sg)
- goto out_list_err;
-
mr->mr_xprt = r_xprt;
mr->mr_ibmr = frmr;
mr->mr_device = NULL;
@@ -150,13 +146,9 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
return 0;
out_mr_err:
- rc = PTR_ERR(frmr);
- trace_xprtrdma_frwr_alloc(mr, rc);
- return rc;
-
-out_list_err:
- ib_dereg_mr(frmr);
- return -ENOMEM;
+ kfree(sg);
+ trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr));
+ return PTR_ERR(frmr);
}
/**
@@ -199,7 +191,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_recv_sge = 1;
ep->re_mrtype = IB_MR_TYPE_MEM_REG;
- if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG)
ep->re_mrtype = IB_MR_TYPE_SG_GAPS;
/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8035a983c8ce..190a4de239c8 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -54,10 +54,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
@@ -1125,6 +1121,7 @@ static bool
rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
@@ -1148,6 +1145,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
if (*p != cpu_to_be32(RPC_CALL))
return false;
+ /* No bc service. */
+ if (xprt->bc_serv == NULL)
+ return false;
+
/* Now that we are sure this is a backchannel call,
* advance to the RPC header.
*/
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 16897fcb659c..aa2227a7e552 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -119,12 +119,12 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL;
}
- page = alloc_page(RPCRDMA_DEF_GFP);
+ page = alloc_page(GFP_NOIO | __GFP_NOWARN);
if (!page)
return -ENOMEM;
rqst->rq_buffer = page_address(page);
- rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP);
+ rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, GFP_NOIO | __GFP_NOWARN);
if (!rqst->rq_rbuffer) {
put_page(page);
return -ENOMEM;
@@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
ret = rpcrdma_bc_send_request(rdma, rqst);
if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
+ svc_xprt_close(sxprt);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index cf76a6ad127b..5242ad121450 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -831,7 +831,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_err;
if (ret == 0)
goto out_drop;
- rqstp->rq_xprt_hlen = ret;
if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
goto out_backchannel;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5f0155fdefc7..11cf7c646644 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 94b20fb47135..199fa012f18a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- set_bit(RQ_SECURE, &rqstp->rq_flags);
+ __set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..10bb2b929c6d 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -60,10 +60,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/*
* tunables
*/
@@ -239,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ unsigned int pflags = current->flags;
int rc;
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
if (!rc) {
@@ -254,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
rpcrdma_xprt_disconnect(r_xprt);
xprt_unlock_connect(xprt, r_xprt);
xprt_wake_pending_tasks(xprt, rc);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -494,8 +494,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
trace_xprtrdma_op_connect(r_xprt, delay);
- queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
- delay);
+ queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay);
}
/**
@@ -521,7 +520,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
return;
out_sleep:
- task->tk_status = -EAGAIN;
+ task->tk_status = -ENOMEM;
xprt_add_backlog(xprt, task);
}
@@ -571,11 +570,7 @@ xprt_rdma_allocate(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- gfp_t flags;
-
- flags = RPCRDMA_DEF_GFP;
- if (RPC_IS_SWAPPER(task))
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp_t flags = rpc_task_gfp_mask();
if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
flags))
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3d3673ba9e1e..44b87e4274b4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,17 +63,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-/*
- * Globals/Macros
- */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/*
- * internal functions
- */
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
@@ -87,8 +76,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
- gfp_t flags);
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@@ -274,8 +262,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_connect_status = -ENETUNREACH;
goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
- sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
ep->re_connect_status = -ENOTCONN;
@@ -291,8 +277,6 @@ disconnected:
break;
}
- dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap,
- ep->re_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -388,7 +372,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep;
int rc;
- ep = kzalloc(sizeof(*ep), GFP_NOFS);
+ ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS);
if (!ep)
return -ENOTCONN;
ep->re_xprt = &r_xprt->rx_xprt;
@@ -419,14 +403,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_attr.qp_type = IB_QPT_RC;
ep->re_attr.port_num = ~0;
- dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
- "iovs: send %d recv %d\n",
- __func__,
- ep->re_attr.cap.max_send_wr,
- ep->re_attr.cap.max_recv_wr,
- ep->re_attr.cap.max_send_sge,
- ep->re_attr.cap.max_recv_sge);
-
ep->re_send_batch = ep->re_max_requests >> 3;
ep->re_send_count = ep->re_send_batch;
init_waitqueue_head(&ep->re_connect_wait);
@@ -436,6 +412,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.send_cq)) {
rc = PTR_ERR(ep->re_attr.send_cq);
+ ep->re_attr.send_cq = NULL;
goto out_destroy;
}
@@ -444,6 +421,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.recv_cq)) {
rc = PTR_ERR(ep->re_attr.recv_cq);
+ ep->re_attr.recv_cq = NULL;
goto out_destroy;
}
ep->re_receive_count = 0;
@@ -482,6 +460,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_pd = ib_alloc_pd(device, 0);
if (IS_ERR(ep->re_pd)) {
rc = PTR_ERR(ep->re_pd);
+ ep->re_pd = NULL;
goto out_destroy;
}
@@ -626,7 +605,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
struct rpcrdma_sendctx *sc;
sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge),
- GFP_KERNEL);
+ XPRTRDMA_GFP_FLAGS);
if (!sc)
return NULL;
@@ -649,7 +628,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
* Sends are posted.
*/
i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS;
- buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+ buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS);
if (!buf->rb_sc_ctxs)
return -ENOMEM;
@@ -760,13 +739,16 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct ib_device *device = ep->re_id->device;
unsigned int count;
+ /* Try to allocate enough to perform one full-sized I/O */
for (count = 0; count < ep->re_max_rdma_segs; count++) {
struct rpcrdma_mr *mr;
int rc;
- mr = kzalloc(sizeof(*mr), GFP_NOFS);
+ mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS,
+ ibdev_to_node(device));
if (!mr)
break;
@@ -811,38 +793,33 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt)
/* If there is no underlying connection, it's no use
* to wake the refresh worker.
*/
- if (ep->re_connect_status == 1) {
- /* The work is scheduled on a WQ_MEM_RECLAIM
- * workqueue in order to prevent MR allocation
- * from recursing into NFS during direct reclaim.
- */
- queue_work(xprtiod_workqueue, &buf->rb_refresh_worker);
- }
+ if (ep->re_connect_status != 1)
+ return;
+ queue_work(system_highpri_wq, &buf->rb_refresh_worker);
}
/**
* rpcrdma_req_create - Allocate an rpcrdma_req object
* @r_xprt: controlling r_xprt
* @size: initial size, in bytes, of send and receive buffers
- * @flags: GFP flags passed to memory allocators
*
* Returns an allocated and fully initialized rpcrdma_req or NULL.
*/
-struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
- gfp_t flags)
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt,
+ size_t size)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpcrdma_req *req;
- req = kzalloc(sizeof(*req), flags);
+ req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS);
if (req == NULL)
goto out1;
- req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
+ req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE);
if (!req->rl_sendbuf)
goto out2;
- req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
+ req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE);
if (!req->rl_recvbuf)
goto out3;
@@ -878,7 +855,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz;
maxhdrsize *= sizeof(__be32);
rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
- DMA_TO_DEVICE, GFP_KERNEL);
+ DMA_TO_DEVICE);
if (!rb)
goto out;
@@ -949,12 +926,12 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
- DMA_FROM_DEVICE, GFP_KERNEL);
+ DMA_FROM_DEVICE);
if (!rep->rr_rdmabuf)
goto out_free;
@@ -1084,8 +1061,8 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) {
struct rpcrdma_req *req;
- req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2,
- GFP_KERNEL);
+ req = rpcrdma_req_create(r_xprt,
+ RPCRDMA_V1_DEF_INLINE_SIZE * 2);
if (!req)
goto out;
list_add(&req->rl_list, &buf->rb_send_bufs);
@@ -1255,15 +1232,14 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
- gfp_t flags)
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb), flags);
+ rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
if (!rb)
return NULL;
- rb->rg_data = kmalloc(size, flags);
+ rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
if (!rb->rg_data) {
kfree(rb);
return NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c79f92eeda76..5e5ff6784ef5 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -149,7 +149,11 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
return rb->rg_data;
}
-#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+/* Do not use emergency memory reserves, and fail quickly if memory
+ * cannot be allocated easily. These flags may be used wherever there
+ * is robust logic to handle a failure to allocate.
+ */
+#define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
* the number of RDMA segments allowed in header chunk lists
@@ -467,8 +471,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
*/
-struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
- gfp_t flags);
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt,
+ size_t size);
int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d8ee06a9650a..915b9902f673 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -58,6 +58,7 @@
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
+static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -260,7 +261,7 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
switch (sap->sa_family) {
case AF_LOCAL:
sun = xs_addr_un(xprt);
- strlcpy(buf, sun->sun_path, sizeof(buf));
+ strscpy(buf, sun->sun_path, sizeof(buf));
xprt->address_strings[RPC_DISPLAY_ADDR] =
kstrdup(buf, GFP_KERNEL);
break;
@@ -427,9 +428,9 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_KERNEL);
+ want = xs_alloc_sparse_pages(
+ buf, min_t(size_t, count - offset, buf->page_len),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
@@ -763,12 +764,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
/**
* xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
*
*/
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
{
- struct rpc_xprt *xprt = req->rq_xprt;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk = transport->inet;
int ret = -EAGAIN;
@@ -780,32 +781,50 @@ static int xs_nospace(struct rpc_rqst *req)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
/* wait for more buffer space */
+ set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;
spin_unlock(&xprt->transport_lock);
+ return ret;
+}
+
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
- /* Race breaker in case memory is freed before above code is called */
- if (ret == -EAGAIN) {
- struct socket_wq *wq;
+ lock_sock(sk);
+ if (!sock_writeable(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
+ return ret;
+}
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
- rcu_read_unlock();
+static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
- sk->sk_write_space(sk);
- }
+ if (vm_wait)
+ return -ENOBUFS;
+ lock_sock(sk);
+ if (!sk_stream_memory_free(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
return ret;
}
-static void
-xs_stream_prepare_request(struct rpc_rqst *req)
+static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
{
- xdr_free_bvec(&req->rq_rcv_buf);
- req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
+ return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
}
/*
@@ -851,27 +870,27 @@ static int xs_local_send_request(struct rpc_rqst *req)
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
+ bool vm_wait;
unsigned int sent;
int status;
/* Close the stream if the previous transmission was incomplete */
if (xs_send_request_was_aborted(transport, req)) {
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
return -ENOTCONN;
}
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
+ vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
+
req->rq_xtime = ktime_get();
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
- if (status == -EAGAIN && sock_writeable(transport->inet))
- status = -ENOBUFS;
-
if (likely(sent > 0) || status == 0) {
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
@@ -881,20 +900,19 @@ static int xs_local_send_request(struct rpc_rqst *req)
return 0;
}
status = -EAGAIN;
+ vm_wait = false;
}
switch (status) {
- case -ENOBUFS:
- break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req, vm_wait);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
fallthrough;
case -EPIPE:
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
status = -ENOTCONN;
}
@@ -935,6 +953,9 @@ static int xs_udp_send_request(struct rpc_rqst *req)
if (!xprt_request_get_cong(xprt, req))
return -EBADSLT;
+ status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+ if (status < 0)
+ return status;
req->rq_xtime = ktime_get();
status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
@@ -963,7 +984,7 @@ process_status:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_sock_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -1005,7 +1026,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
- bool vm_wait = false;
+ bool vm_wait;
unsigned int sent;
int status;
@@ -1025,12 +1046,17 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
+ xs_set_srcport(transport, transport->sock);
+
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
tcp_sock_set_cork(transport->inet, true);
- while (1) {
+
+ vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
+
+ do {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
@@ -1051,31 +1077,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
WARN_ON_ONCE(sent == 0 && status == 0);
- if (status == -EAGAIN ) {
- /*
- * Return EAGAIN if we're sure we're hitting the
- * socket send buffer limits.
- */
- if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
- break;
- /*
- * Did we hit a memory allocation failure?
- */
- if (sent == 0) {
- status = -ENOBUFS;
- if (vm_wait)
- break;
- /* Retry, knowing now that we're below the
- * socket send buffer limit
- */
- vm_wait = true;
- }
- continue;
- }
- if (status < 0)
- break;
- vm_wait = false;
- }
+ if (sent > 0)
+ vm_wait = false;
+
+ } while (status == 0);
switch (status) {
case -ENOTSOCK:
@@ -1083,7 +1088,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req, vm_wait);
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -1124,6 +1129,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
+ clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1179,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
+ /*
+ * Make sure we're calling this in a context from which it is safe
+ * to call __fput_sync(). In practice that means rpciod and the
+ * system workqueue.
+ */
+ if (!(current->flags & PF_WQ_WORKER)) {
+ WARN_ON_ONCE(1);
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ return;
+ }
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
@@ -1202,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
- fput(filp);
+ __fput_sync(filp);
xprt_disconnect_done(xprt);
}
@@ -1331,7 +1347,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
if (sk == NULL)
goto out;
for (;;) {
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (skb == NULL)
break;
xs_udp_data_read_skb(&transport->xprt, sk, skb);
@@ -1354,7 +1370,7 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
}
/**
- * xs_data_ready - "data ready" callback for UDP sockets
+ * xs_data_ready - "data ready" callback for sockets
* @sk: socket with data to read
*
*/
@@ -1362,11 +1378,13 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
- dprintk("RPC: xs_data_ready...\n");
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt);
+
+ trace_xs_data_ready(xprt);
+
transport->old_data_ready(sk);
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
@@ -1395,6 +1413,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
+ * xs_local_state_change - callback to handle AF_LOCAL socket state changes
+ * @sk: socket whose state has changed
+ *
+ */
+static void xs_local_state_change(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+ if (!(xprt = xprt_from_sock(sk)))
+ return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ if (sk->sk_shutdown & SHUTDOWN_MASK) {
+ clear_bit(XPRT_CONNECTED, &xprt->state);
+ /* Trigger the socket release */
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
+ }
+}
+
+/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
*
@@ -1470,7 +1508,6 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
- struct socket_wq *wq;
struct sock_xprt *transport;
struct rpc_xprt *xprt;
@@ -1481,15 +1518,10 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
transport = container_of(xprt, struct sock_xprt, xprt);
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
- goto out;
-
+ if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state))
+ return;
xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
sk->sk_write_pending--;
-out:
- rcu_read_unlock();
}
/**
@@ -1587,7 +1619,7 @@ static int xs_get_random_port(void)
if (max < min)
return -EADDRINUSE;
range = max - min + 1;
- rand = (unsigned short) prandom_u32() % range;
+ rand = prandom_u32_max(range);
return rand + min;
}
@@ -1638,12 +1670,35 @@ static int xs_get_srcport(struct sock_xprt *transport)
return port;
}
-unsigned short get_srcport(struct rpc_xprt *xprt)
+static unsigned short xs_sock_srcport(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
+ unsigned short ret = 0;
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock)
+ ret = xs_sock_getport(sock->sock);
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
+}
+
+static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen)
{
struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
- return xs_sock_getport(sock->sock);
+ union {
+ struct sockaddr sa;
+ struct sockaddr_storage st;
+ } saddr;
+ int ret = -ENOTCONN;
+
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock) {
+ ret = kernel_getsockname(sock->sock, &saddr.sa);
+ if (ret >= 0)
+ ret = snprintf(buf, buflen, "%pISc", &saddr.sa);
+ }
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
}
-EXPORT_SYMBOL(get_srcport);
static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
{
@@ -1825,7 +1880,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
+ sk->sk_state_change = xs_local_state_change;
sk->sk_error_report = xs_error_report;
xprt_clear_connected(xprt);
@@ -1910,7 +1965,10 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
- if (RPC_IS_ASYNC(task)) {
+ if (transport->file)
+ goto force_disconnect;
+
+ if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
* filesystem namespace of the process making the rpc
@@ -1920,20 +1978,25 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
* we'll need to figure out how to pass a namespace to
* connect.
*/
- task->tk_rpc_status = -ENOTCONN;
- rpc_exit(task, -ENOTCONN);
- return;
+ rpc_task_set_rpc_status(task, -ENOTCONN);
+ goto out_wake;
}
ret = xs_local_setup_socket(transport);
if (ret && !RPC_IS_SOFTCONN(task))
msleep_interruptible(15000);
+ return;
+force_disconnect:
+ xprt_force_disconnect(xprt);
+out_wake:
+ xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
/*
- * Note that this should be called with XPRT_LOCKED held (or when we otherwise
- * know that we have exclusive access to the socket), to guard against
- * races with xs_reset_transport.
+ * Note that this should be called with XPRT_LOCKED held, or recv_mutex
+ * held, or when we otherwise know that we have exclusive access to the
+ * socket, to guard against races with xs_reset_transport.
*/
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
@@ -1962,13 +2025,11 @@ xs_enable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
- if (atomic_inc_return(&xprt->swapper) != 1)
- return 0;
- if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
- return -ERESTARTSYS;
- if (xs->inet)
+ mutex_lock(&xs->recv_mutex);
+ if (atomic_inc_return(&xprt->swapper) == 1 &&
+ xs->inet)
sk_set_memalloc(xs->inet);
- xprt_release_xprt(xprt, NULL);
+ mutex_unlock(&xs->recv_mutex);
return 0;
}
@@ -1984,13 +2045,11 @@ xs_disable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
- if (!atomic_dec_and_test(&xprt->swapper))
- return;
- if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
- return;
- if (xs->inet)
+ mutex_lock(&xs->recv_mutex);
+ if (atomic_dec_and_test(&xprt->swapper) &&
+ xs->inet)
sk_clear_memalloc(xs->inet);
- xprt_release_xprt(xprt, NULL);
+ mutex_unlock(&xs->recv_mutex);
}
#else
static void xs_set_memalloc(struct rpc_xprt *xprt)
@@ -2023,7 +2082,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
xprt_set_connected(xprt);
@@ -2047,7 +2105,10 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock;
int status = -EIO;
+ unsigned int pflags = current->flags;
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_DGRAM,
IPPROTO_UDP, false);
@@ -2067,6 +2128,7 @@ out:
xprt_clear_connecting(xprt);
xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -2186,7 +2248,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
/* socket options */
@@ -2226,11 +2287,19 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
struct rpc_xprt *xprt = &transport->xprt;
int status;
+ unsigned int pflags = current->flags;
+
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
- if (!sock) {
- sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_STREAM,
- IPPROTO_TCP, true);
+ if (xprt_connected(xprt))
+ goto out;
+ if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
+ &transport->sock_state) ||
+ !sock) {
+ xs_reset_transport(transport);
+ sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, true);
if (IS_ERR(sock)) {
xprt_wake_pending_tasks(xprt, PTR_ERR(sock));
goto out;
@@ -2250,10 +2319,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
sock->sk->sk_state);
switch (status) {
case 0:
- xs_set_srcport(transport, sock);
- fallthrough;
case -EINPROGRESS:
/* SYN_SENT! */
+ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
fallthrough;
@@ -2291,6 +2359,7 @@ out:
xprt_clear_connecting(xprt);
out_unlock:
xprt_unlock_connect(xprt, transport);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -2314,13 +2383,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
- if (transport->sock != NULL && !xprt_connecting(xprt)) {
+ if (transport->sock != NULL) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
- "seconds\n",
- xprt, xprt->reestablish_timeout / HZ);
-
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
+ "seconds\n", xprt, xprt->reestablish_timeout / HZ);
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
@@ -2482,7 +2547,7 @@ static int bc_malloc(struct rpc_task *task)
return -EINVAL;
}
- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (!page)
return -ENOMEM;
@@ -2520,6 +2585,9 @@ static int bc_sendto(struct rpc_rqst *req)
int err;
req->rq_xtime = ktime_get();
+ err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+ if (err < 0)
+ return err;
err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
@@ -2616,6 +2684,8 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
+ .get_srcaddr = xs_sock_srcaddr,
+ .get_srcport = xs_sock_srcport,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.send_request = xs_udp_send_request,
@@ -2638,6 +2708,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
+ .get_srcaddr = xs_sock_srcaddr,
+ .get_srcport = xs_sock_srcport,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
@@ -2796,9 +2868,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);