diff options
Diffstat (limited to 'net/sunrpc')
41 files changed, 1421 insertions, 809 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a9f0d17fdb0d..fb75a883503f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -445,7 +445,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Enforce a 60 second garbage collection moratorium * Note that the cred_unused list must be time-ordered. */ - if (!time_in_range(cred->cr_expire, expired, jiffies)) + if (time_in_range(cred->cr_expire, expired, jiffies)) continue; if (!rpcauth_unhash_cred(cred)) continue; @@ -615,6 +615,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) }; struct rpc_cred *ret; + if (RPC_IS_ASYNC(task)) + lookupflags |= RPCAUTH_LOOKUP_ASYNC; ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); put_cred(acred.cred); return ret; @@ -631,6 +633,8 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) if (!acred.principal) return NULL; + if (RPC_IS_ASYNC(task)) + lookupflags |= RPCAUTH_LOOKUP_ASYNC; return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } @@ -654,7 +658,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) }; if (flags & RPC_TASK_ASYNC) - lookupflags |= RPCAUTH_LOOKUP_NEW; + lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC; if (task->tk_op_cred) /* Task must use exactly this rpc_cred */ new = get_rpccred(task->tk_op_cred); @@ -666,7 +670,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) /* If machine cred couldn't be bound, try a root cred */ if (new) ; - else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) + else if (cred == &machine_cred) new = rpcauth_bind_root_cred(task, lookupflags); else if (flags & RPC_TASK_NULLCREDS) new = authnull_ops.lookup_cred(NULL, NULL, 0); @@ -870,7 +874,7 @@ int __init rpcauth_init_module(void) err = rpc_init_authunix(); if (err < 0) goto out1; - err = register_shrinker(&rpc_cred_shrinker); + err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred"); if (err < 0) goto out2; return 0; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5f42aa5fc612..7bb247c51e2f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -72,7 +72,8 @@ struct gss_auth { struct gss_api_mech *mech; enum rpc_gss_svc service; struct rpc_clnt *client; - struct net *net; + struct net *net; + netns_tracker ns_tracker; /* * There are two upcall pipes; dentry[1], named "gssd", is used * for the new text-based upcall; dentry[0] is named after the @@ -145,7 +146,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_NOFS); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -208,7 +209,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(-EFAULT); goto err; } - ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS); + ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL); if (ret < 0) { trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); @@ -510,7 +511,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, int vers; int err = -ENOMEM; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); if (gss_msg == NULL) goto err; vers = get_pipe_version(gss_auth->net); @@ -526,7 +527,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, gss_msg->auth = gss_auth; kref_get(&gss_auth->kref); if (service_name) { - gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); + gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL); if (!gss_msg->service_name) { err = -ENOMEM; goto err_put_pipe_version; @@ -702,7 +703,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_NOFS); + buf = kmalloc(mlen, GFP_KERNEL); if (!buf) goto out; @@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) goto err_free; } gss_auth->client = clnt; - gss_auth->net = get_net(rpc_net_ns(clnt)); + gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker, + GFP_KERNEL); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) @@ -1068,7 +1070,7 @@ err_destroy_credcache: err_put_mech: gss_mech_put(gss_auth->mech); err_put_net: - put_net(gss_auth->net); + put_net_track(gss_auth->net, &gss_auth->ns_tracker); err_free: kfree(gss_auth->target_name); kfree(gss_auth); @@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth) gss_pipe_free(gss_auth->gss_pipe[0]); gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); - put_net(gss_auth->net); + put_net_track(gss_auth->net, &gss_auth->ns_tracker); kfree(gss_auth->target_name); kfree(gss_auth); @@ -1218,7 +1220,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ - new = kzalloc(sizeof(*gss_cred), GFP_NOFS); + new = kzalloc(sizeof(*gss_cred), GFP_KERNEL); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, @@ -1338,10 +1340,11 @@ gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) /* * Lookup RPCSEC_GSS cred for the current process */ -static struct rpc_cred * -gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +static struct rpc_cred *gss_lookup_cred(struct rpc_auth *auth, + struct auth_cred *acred, int flags) { - return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); + return rpcauth_lookup_credcache(auth, acred, flags, + rpc_task_gfp_mask()); } static struct rpc_cred * @@ -1667,7 +1670,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) if (!p) goto validate_failed; - seq = kmalloc(4, GFP_NOFS); + seq = kmalloc(4, GFP_KERNEL); if (!seq) goto validate_failed; *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); @@ -1777,11 +1780,11 @@ alloc_enc_pages(struct rpc_rqst *rqstp) rqstp->rq_enc_pages = kmalloc_array(rqstp->rq_enc_pages_num, sizeof(struct page *), - GFP_NOFS); + GFP_KERNEL); if (!rqstp->rq_enc_pages) goto out; for (i=0; i < rqstp->rq_enc_pages_num; i++) { - rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); + rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL); if (rqstp->rq_enc_pages[i] == NULL) goto out_free; } @@ -1985,8 +1988,8 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, if (offset + len > rcv_buf->len) goto unwrap_failed; mic.len = len; - mic.data = kmalloc(len, GFP_NOFS); - if (!mic.data) + mic.data = kmalloc(len, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(mic.data)) goto unwrap_failed; if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len)) goto unwrap_failed; diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index f6d9631bd9d0..c53b329092d4 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); if (len) { - dest->data = kmemdup(p, len, GFP_NOFS); + dest->data = kmemdup(p, len, GFP_KERNEL); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); } else diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index fe97f3106536..4a4082bb22ad 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, if (ret) return ret; - if (!ret) { - *buf_in = buf; - *body_size = toksize; - } + *buf_in = buf; + *body_size = toksize; return ret; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 634b6c6e0dcb..3ea58175e159 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -161,7 +161,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); if (checksumdata == NULL) return GSS_S_FAILURE; @@ -169,7 +169,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_NOFS); + req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) goto out_free_ahash; @@ -257,7 +257,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); if (!checksumdata) return GSS_S_FAILURE; @@ -265,7 +265,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_NOFS); + req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) goto out_free_ahash; @@ -554,7 +554,7 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf, WARN_ON(0); return -ENOMEM; } - data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS); + data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index fb117817ff5d..3200b971a814 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -49,7 +49,7 @@ krb5_make_seq_num(struct krb5_ctx *kctx, unsigned char *plain; s32 code; - plain = kmalloc(8, GFP_NOFS); + plain = kmalloc(8, GFP_KERNEL); if (!plain) return -ENOMEM; @@ -80,7 +80,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx, dprintk("RPC: krb5_get_seq_num:\n"); - plain = kmalloc(8, GFP_NOFS); + plain = kmalloc(8, GFP_KERNEL); if (!plain) return -ENOMEM; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index e95c009bb869..48337687848c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen) /* initialize to random value */ if (i == 0) { - i = prandom_u32(); - i = (i << 32) | prandom_u32(); + i = get_random_u32(); + i = (i << 32) | get_random_u32(); } switch (conflen) { @@ -409,7 +409,7 @@ static u32 gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages) { - u8 *ptr, *plainhdr; + u8 *ptr; time64_t now; u8 flags = 0x00; __be16 *be16ptr; @@ -426,7 +426,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, return GSS_S_FAILURE; /* construct gss token header */ - ptr = plainhdr = buf->head[0].iov_base + offset; + ptr = buf->head[0].iov_base + offset; *ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff); *ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff); diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf2..f549e4c05def 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) * done without the correct namespace: */ .flags = RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_CONNECTED | RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index b87565b64928..bcd74dddbe2d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { @@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net) static ssize_t write_gssp(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct net *net = PDE_DATA(file_inode(file)); + struct net *net = pde_data(file_inode(file)); char tbuf[20]; unsigned long i; int res; @@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf, static ssize_t read_gssp(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct net *net = PDE_DATA(file_inode(file)); + struct net *net = pde_data(file_inode(file)); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); unsigned long p = *ppos; char tbuf[10]; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index e7df1f782b2e..1e091d3fa607 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -40,11 +40,19 @@ unx_destroy(struct rpc_auth *auth) /* * Lookup AUTH_UNIX creds for current process */ -static struct rpc_cred * -unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth, + struct auth_cred *acred, int flags) { - struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); - + struct rpc_cred *ret; + + ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask()); + if (!ret) { + if (!(flags & RPCAUTH_LOOKUP_ASYNC)) + return ERR_PTR(-ENOMEM); + ret = mempool_alloc(unix_pool, GFP_NOWAIT); + if (!ret) + return ERR_PTR(-ENOMEM); + } rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 22a2c235abf1..65a6c6429a53 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** (c) 2007 Network Appliance, Inc. All Rights Reserved. (c) 2009 NetApp. All Rights Reserved. -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -https://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ @@ -64,6 +50,17 @@ static void xprt_free_allocation(struct rpc_rqst *req) kfree(req); } +static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf) +{ + buf->head[0].iov_len = PAGE_SIZE; + buf->tail[0].iov_len = 0; + buf->pages = NULL; + buf->page_len = 0; + buf->flags = 0; + buf->len = 0; + buf->buflen = PAGE_SIZE; +} + static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) { struct page *page; @@ -75,9 +72,9 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) return 0; } -static -struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) +static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt) { + gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; struct rpc_rqst *req; /* Pre-allocate one backchannel rpc_rqst */ @@ -154,7 +151,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) INIT_LIST_HEAD(&tmp_list); for (i = 0; i < min_reqs; i++) { /* Pre-allocate one backchannel rpc_rqst */ - req = xprt_alloc_bc_req(xprt, GFP_KERNEL); + req = xprt_alloc_bc_req(xprt); if (req == NULL) { printk(KERN_ERR "Failed to create bc rpc_rqst\n"); goto out_free; @@ -292,6 +289,9 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) */ spin_lock_bh(&xprt->bc_pa_lock); if (xprt_need_to_requeue(xprt)) { + xprt_bc_reinit_xdr_buf(&req->rq_snd_buf); + xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf); + req->rq_rcv_buf.len = PAGE_SIZE; list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; atomic_inc(&xprt->bc_slot_count); @@ -343,7 +343,7 @@ found: break; } else if (req) break; - new = xprt_alloc_bc_req(xprt, GFP_KERNEL); + new = xprt_alloc_bc_req(xprt); } while (new); return req; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 59641803472c..f075a9fb5ccc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -33,7 +33,9 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <trace/events/sunrpc.h> + #include "netns.h" +#include "fail.h" #define RPCDBG_FACILITY RPCDBG_CACHE @@ -675,7 +677,7 @@ static void cache_limit_defers(void) /* Consider removing either the first or the last */ if (cache_defer_cnt > DFR_MAX) { - if (prandom_u32() & 1) + if (prandom_u32_max(2)) discard = list_entry(cache_defer_list.next, struct cache_deferred_req, recent); else @@ -688,16 +690,30 @@ static void cache_limit_defers(void) discard->revisit(discard, 1); } +#if IS_ENABLED(CONFIG_FAIL_SUNRPC) +static inline bool cache_defer_immediately(void) +{ + return !fail_sunrpc.ignore_cache_wait && + should_fail(&fail_sunrpc.attr, 1); +} +#else +static inline bool cache_defer_immediately(void) +{ + return false; +} +#endif + /* Return true if and only if a deferred request is queued. */ static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; - if (req->thread_wait) { + if (!cache_defer_immediately()) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) return false; } + dreq = req->defer(req); if (dreq == NULL) return false; @@ -1536,7 +1552,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf, static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_read(filp, buf, count, ppos, cd); } @@ -1544,14 +1560,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf, static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_poll(filp, wait, cd); } @@ -1560,21 +1576,21 @@ static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_open(inode, filp, cd); } static int cache_release_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_release(inode, filp, cd); } @@ -1591,14 +1607,14 @@ static const struct proc_ops cache_channel_proc_ops = { static int content_open_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return content_open(inode, filp, cd); } static int content_release_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return content_release(inode, filp, cd); } @@ -1612,14 +1628,14 @@ static const struct proc_ops content_proc_ops = { static int open_flush_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return open_flush(inode, filp, cd); } static int release_flush_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return release_flush(inode, filp, cd); } @@ -1627,7 +1643,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp) static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return read_flush(filp, buf, count, ppos, cd); } @@ -1636,7 +1652,7 @@ static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return write_flush(filp, buf, count, ppos, cd); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a312ea2bc440..993acf38af87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task, static int rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr); static int rpc_ping(struct rpc_clnt *clnt); +static int rpc_ping_noreply(struct rpc_clnt *clnt); static void rpc_check_timeout(struct rpc_task *task); static void rpc_register_client(struct rpc_clnt *clnt) @@ -344,7 +345,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; - clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL); + clid = ida_alloc(&rpc_clids, GFP_KERNEL); if (clid < 0) return clid; clnt->cl_clid = clid; @@ -353,7 +354,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) static void rpc_free_clid(struct rpc_clnt *clnt) { - ida_simple_remove(&rpc_clids, clnt->cl_clid); + ida_free(&rpc_clids, clnt->cl_clid); } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, @@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, rpc_shutdown_client(clnt); return ERR_PTR(err); } + } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) { + int err = rpc_ping_noreply(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } } clnt->cl_softrtry = 1; @@ -644,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; new->cl_principal = clnt->cl_principal; + new->cl_max_connect = clnt->cl_max_connect; return new; out_err: @@ -778,7 +786,8 @@ out_revert: EXPORT_SYMBOL_GPL(rpc_switch_client_transport); static -int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi) +int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi, + void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps)) { struct rpc_xprt_switch *xps; @@ -787,11 +796,24 @@ int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi) rcu_read_unlock(); if (xps == NULL) return -EAGAIN; - xprt_iter_init_listall(xpi, xps); + func(xpi, xps); xprt_switch_put(xps); return 0; } +static +int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi) +{ + return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listall); +} + +static +int rpc_clnt_xprt_iter_offline_init(struct rpc_clnt *clnt, + struct rpc_xprt_iter *xpi) +{ + return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listoffline); +} + /** * rpc_clnt_iterate_for_each_xprt - Apply a function to all transports * @clnt: pointer to client @@ -851,6 +873,57 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_killall_tasks); +/** + * rpc_cancel_tasks - try to cancel a set of RPC tasks + * @clnt: Pointer to RPC client + * @error: RPC task error value to set + * @fnmatch: Pointer to selector function + * @data: User data + * + * Uses @fnmatch to define a set of RPC tasks that are to be cancelled. + * The argument @error must be a negative error value. + */ +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data) +{ + struct rpc_task *task; + unsigned long count = 0; + + if (list_empty(&clnt->cl_tasks)) + return 0; + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(task)) + continue; + if (!fnmatch(task, data)) + continue; + rpc_task_try_cancel(task, error); + count++; + } + spin_unlock(&clnt->cl_lock); + return count; +} +EXPORT_SYMBOL_GPL(rpc_cancel_tasks); + +static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, void *dummy) +{ + if (xprt_connected(xprt)) + xprt_force_disconnect(xprt); + return 0; +} + +void rpc_clnt_disconnect(struct rpc_clnt *clnt) +{ + rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL); +} +EXPORT_SYMBOL_GPL(rpc_clnt_disconnect); + /* * Properly shut down an RPC client, terminating all outstanding * requests. @@ -1065,8 +1138,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt) - return; + if (task->tk_xprt) { + if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) + return; + xprt_release(task); + xprt_put(task->tk_xprt); + } if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else @@ -1085,8 +1163,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -1127,6 +1203,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) struct rpc_task *task; task = rpc_new_task(task_setup_data); + if (IS_ERR(task)) + return task; if (!RPC_IS_ASYNC(task)) task->tk_flags |= RPC_TASK_CRED_NOREF; @@ -1227,6 +1305,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); + if (IS_ERR(task)) { + xprt_free_bc_request(req); + return task; + } + xprt_init_bc_request(req, task); task->tk_action = call_bc_encode; @@ -1610,7 +1693,7 @@ static void __rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) { trace_rpc_call_rpcerror(task, tk_status, rpc_status); - task->tk_rpc_status = rpc_status; + rpc_task_set_rpc_status(task, rpc_status); rpc_exit(task, tk_status); } @@ -1745,6 +1828,9 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; trace_rpc_retry_refresh_status(task); return; + case -ENOMEM: + rpc_delay(task, HZ >> 4); + return; } trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); @@ -1835,7 +1921,6 @@ rpc_xdr_encode(struct rpc_task *task) req->rq_snd_buf.head[0].iov_len = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, req->rq_snd_buf.head[0].iov_base, req); - xdr_free_bvec(&req->rq_snd_buf); if (rpc_encode_header(task, &xdr)) return; @@ -1855,6 +1940,9 @@ call_encode(struct rpc_task *task) xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ rpc_xdr_encode(task); + /* Add task to reply queue before transmission to avoid races */ + if (task->tk_status == 0 && rpc_reply_expected(task)) + task->tk_status = xprt_request_enqueue_receive(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ @@ -1865,7 +1953,7 @@ call_encode(struct rpc_task *task) break; case -EKEYEXPIRED: if (!task->tk_cred_retry) { - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); } else { task->tk_action = call_refresh; task->tk_cred_retry--; @@ -1878,9 +1966,6 @@ call_encode(struct rpc_task *task) return; } - /* Add task to reply queue before transmission to avoid races */ - if (rpc_reply_expected(task)) - xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: task->tk_action = call_transmit; @@ -2116,7 +2201,8 @@ call_connect_status(struct rpc_task *task) xprt_release(task); value = atomic_long_dec_return(&xprt->queuelen); if (value == 0) - rpc_xprt_switch_remove_xprt(xps, saved); + rpc_xprt_switch_remove_xprt(xps, saved, + true); xprt_put(saved); task->tk_xprt = NULL; task->tk_action = call_start; @@ -2197,6 +2283,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2280,6 +2367,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2362,6 +2450,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; @@ -2393,10 +2486,8 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - if (RPC_SIGNALLED(task)) { - rpc_call_rpcerror(task, -ERESTARTSYS); + if (RPC_SIGNALLED(task)) return; - } if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; @@ -2622,7 +2713,7 @@ out_unparsable: out_verifier: trace_rpc_bad_verifier(task); - goto out_garbage; + goto out_err; out_msg_denied: error = -EACCES; @@ -2689,6 +2780,10 @@ static const struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; +static const struct rpc_procinfo rpcproc_null_noreply = { + .p_encode = rpcproc_encode_null, +}; + static void rpc_null_call_prepare(struct rpc_task *task, void *data) { @@ -2742,6 +2837,28 @@ static int rpc_ping(struct rpc_clnt *clnt) return status; } +static int rpc_ping_noreply(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null_noreply, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = &rpc_null_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + struct rpc_cb_add_xprt_calldata { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; @@ -2793,7 +2910,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, return -EINVAL; } - data = kmalloc(sizeof(*data), GFP_NOFS); + data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->xps = xprt_switch_get(xps); @@ -2805,6 +2922,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); + data->xps->xps_nunique_destaddr_xprts++; rpc_put_task(task); success: @@ -2812,6 +2932,30 @@ success: } EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt); +static int rpc_clnt_add_xprt_helper(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + struct rpc_add_xprt_test *data) +{ + struct rpc_task *task; + int status = -EADDRINUSE; + + /* Test the connection */ + task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); + + status = task->tk_status; + rpc_put_task(task); + + if (status < 0) + return status; + + /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ + data->add_xprt_test(clnt, xprt, data->data); + + return 0; +} + /** * rpc_clnt_setup_test_and_add_xprt() * @@ -2835,8 +2979,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { - struct rpc_task *task; - struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; int status = -EADDRINUSE; xprt = xprt_get(xprt); @@ -2845,31 +2987,19 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) goto out_err; - /* Test the connection */ - task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out_err; - } - status = task->tk_status; - rpc_put_task(task); - + status = rpc_clnt_add_xprt_helper(clnt, xprt, data); if (status < 0) goto out_err; - /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ - xtest->add_xprt_test(clnt, xprt, xtest->data); - - xprt_put(xprt); - xprt_switch_put(xps); - - /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ - return 1; + status = 1; out_err: xprt_put(xprt); xprt_switch_put(xps); - pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n", - status, xprt->address_strings[RPC_DISPLAY_ADDR]); + if (status < 0) + pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not " + "added\n", status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ return status; } EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt); @@ -2900,7 +3030,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, unsigned long connect_timeout; unsigned long reconnect_timeout; unsigned char resvport, reuseport; - int ret = 0; + int ret = 0, ident; rcu_read_lock(); xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); @@ -2914,8 +3044,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, reuseport = xprt->reuseport; connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; + ident = xprt->xprt_class->ident; rcu_read_unlock(); + if (!xprtargs->ident) + xprtargs->ident = ident; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); @@ -2943,6 +3076,110 @@ out_put_switch: } EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); +static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + struct rpc_add_xprt_test *data) +{ + struct rpc_xprt_switch *xps; + struct rpc_xprt *main_xprt; + int status = 0; + + xprt_get(xprt); + + rcu_read_lock(); + main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr, + (struct sockaddr *)&main_xprt->addr); + rcu_read_unlock(); + xprt_put(main_xprt); + if (status || !test_bit(XPRT_OFFLINE, &xprt->state)) + goto out; + + status = rpc_clnt_add_xprt_helper(clnt, xprt, data); +out: + xprt_put(xprt); + xprt_switch_put(xps); + return status; +} + +/* rpc_clnt_probe_trunked_xprt -- probe offlined transport for session trunking + * @clnt rpc_clnt structure + * + * For each offlined transport found in the rpc_clnt structure call + * the function rpc_xprt_probe_trunked() which will determine if this + * transport still belongs to the trunking group. + */ +void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *clnt, + struct rpc_add_xprt_test *data) +{ + struct rpc_xprt_iter xpi; + int ret; + + ret = rpc_clnt_xprt_iter_offline_init(clnt, &xpi); + if (ret) + return; + for (;;) { + struct rpc_xprt *xprt = xprt_iter_get_next(&xpi); + + if (!xprt) + break; + ret = rpc_xprt_probe_trunked(clnt, xprt, data); + xprt_put(xprt); + if (ret < 0) + break; + xprt_iter_rewind(&xpi); + } + xprt_iter_destroy(&xpi); +} +EXPORT_SYMBOL_GPL(rpc_clnt_probe_trunked_xprts); + +static int rpc_xprt_offline(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *data) +{ + struct rpc_xprt *main_xprt; + struct rpc_xprt_switch *xps; + int err = 0; + + xprt_get(xprt); + + rcu_read_lock(); + main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + err = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr, + (struct sockaddr *)&main_xprt->addr); + rcu_read_unlock(); + xprt_put(main_xprt); + if (err) + goto out; + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + err = -EINTR; + goto out; + } + xprt_set_offline_locked(xprt, xps); + + xprt_release_write(xprt, NULL); +out: + xprt_put(xprt); + xprt_switch_put(xps); + return err; +} + +/* rpc_clnt_manage_trunked_xprts -- offline trunked transports + * @clnt rpc_clnt structure + * + * For each active transport found in the rpc_clnt structure call + * the function rpc_xprt_offline() which will identify trunked transports + * and will mark them offline. + */ +void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *clnt) +{ + rpc_clnt_iterate_for_each_xprt(clnt, rpc_xprt_offline, NULL); +} +EXPORT_SYMBOL_GPL(rpc_clnt_manage_trunked_xprts); + struct connect_timeout_data { unsigned long connect_timeout; unsigned long reconnect_timeout; @@ -2985,8 +3222,22 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); +void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + struct rpc_xprt_switch *xps; + + rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + rcu_read_unlock(); + xprt_set_online_locked(xprt, xps); +} + void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { + if (rpc_clnt_xprt_switch_has_addr(clnt, + (const struct sockaddr *)&xprt->addr)) { + return rpc_clnt_xprt_set_online(clnt, xprt); + } rcu_read_lock(); rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), xprt); @@ -2994,6 +3245,19 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); +void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + struct rpc_xprt_switch *xps; + + rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + rpc_xprt_switch_remove_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), + xprt, 0); + xps->xps_nunique_destaddr_xprts--; + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_remove_xprt); + bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap) { @@ -3065,6 +3329,8 @@ rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt, int rpc_clnt_swap_activate(struct rpc_clnt *clnt) { + while (clnt != clnt->cl_parent) + clnt = clnt->cl_parent; if (atomic_inc_return(&clnt->cl_swapper) == 1) return rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_swap_activate_callback, NULL); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 7dc9cc929bfd..a176d5a0b0ee 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -262,6 +262,9 @@ static void fail_sunrpc_init(void) debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir, &fail_sunrpc.ignore_server_disconnect); + + debugfs_create_bool("ignore-cache-wait", S_IFREG | 0600, dir, + &fail_sunrpc.ignore_cache_wait); } #else static void fail_sunrpc_init(void) diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h index 69dc30cc44b8..4b4b500df428 100644 --- a/net/sunrpc/fail.h +++ b/net/sunrpc/fail.h @@ -14,8 +14,8 @@ struct fail_sunrpc_attr { struct fault_attr attr; bool ignore_client_disconnect; - bool ignore_server_disconnect; + bool ignore_cache_wait; }; extern struct fail_sunrpc_attr fail_sunrpc; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ee5336d73fdd..0b6034fab9ab 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -197,7 +197,7 @@ static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; - rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); + rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; @@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 647b323cc1d5..5a8e6d46809a 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); + map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask()); if (!map) { status = -ENOMEM; goto bailout_release_client; @@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; - map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); + map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask()); if (!map->r_addr) { status = -ENOMEM; goto bailout_free_args; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e2c835482791..be587a308e05 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -57,6 +57,21 @@ struct workqueue_struct *rpciod_workqueue __read_mostly; struct workqueue_struct *xprtiod_workqueue __read_mostly; EXPORT_SYMBOL_GPL(xprtiod_workqueue); +gfp_t rpc_task_gfp_mask(void) +{ + if (current->flags & PF_WQ_WORKER) + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + return GFP_KERNEL; +} +EXPORT_SYMBOL_GPL(rpc_task_gfp_mask); + +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status) +{ + if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0) + return true; + return false; +} + unsigned long rpc_task_timeout(const struct rpc_task *task) { @@ -186,11 +201,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, /* * Add new request to wait queue. - * - * Swapper tasks always get inserted at the head of the queue. - * This should avoid many nasty memory deadlocks and hopefully - * improve overall performance. - * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, @@ -199,8 +209,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); - else if (RPC_IS_SWAPPER(task)) - list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; @@ -268,7 +276,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { - freezable_schedule_unsafe(); + schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; @@ -332,14 +340,12 @@ static int rpc_complete_task(struct rpc_task *task) * to enforce taking of the wq->lock and hence avoid races with * rpc_complete_task(). */ -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action) +int rpc_wait_for_completion_task(struct rpc_task *task) { - if (action == NULL) - action = rpc_wait_bit_killable; return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, - action, TASK_KILLABLE); + rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); } -EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); +EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task); /* * Make an RPC task runnable. @@ -854,12 +860,25 @@ void rpc_signal_task(struct rpc_task *task) if (!RPC_IS_ACTIVATED(task)) return; + if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) + return; trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) - rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS); + rpc_wake_up_queued_task(queue, task); +} + +void rpc_task_try_cancel(struct rpc_task *task, int error) +{ + struct rpc_wait_queue *queue; + + if (!rpc_task_set_rpc_status(task, error)) + return; + queue = READ_ONCE(task->tk_waitqueue); + if (queue) + rpc_wake_up_queued_task(queue, task); } void rpc_exit(struct rpc_task *task, int status) @@ -876,6 +895,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) ops->rpc_release(calldata); } +static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk) +{ + if (!xprt) + return false; + if (!atomic_read(&xprt->swapper)) + return false; + return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -884,6 +912,7 @@ static void __rpc_execute(struct rpc_task *task) struct rpc_wait_queue *queue; int task_is_async = RPC_IS_ASYNC(task); int status = 0; + unsigned long pflags = current->flags; WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) @@ -896,16 +925,26 @@ static void __rpc_execute(struct rpc_task *task) * Perform the next FSM step or a pending callback. * * tk_action may be NULL if the task has been killed. - * In particular, note that rpc_killall_tasks may - * do this at any time, so beware when dereferencing. */ do_action = task->tk_action; + /* Tasks with an RPC error status should exit */ + if (do_action != rpc_exit_task && + (status = READ_ONCE(task->tk_rpc_status)) != 0) { + task->tk_status = status; + if (do_action != NULL) + do_action = rpc_exit_task; + } + /* Callbacks override all actions */ if (task->tk_callback) { do_action = task->tk_callback; task->tk_callback = NULL; } if (!do_action) break; + if (RPC_IS_SWAPPER(task) || + xprt_needs_memalloc(task->tk_xprt, task)) + current->flags |= PF_MEMALLOC; + trace_rpc_task_run_action(task, do_action); do_action(task); @@ -918,14 +957,6 @@ static void __rpc_execute(struct rpc_task *task) } /* - * Signalled tasks should exit rather than sleep. - */ - if (RPC_SIGNALLED(task)) { - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); - } - - /* * The queue->lock protects against races with * rpc_make_runnable(). * @@ -940,16 +971,22 @@ static void __rpc_execute(struct rpc_task *task) spin_unlock(&queue->lock); continue; } + /* Wake up any task that has an exit status */ + if (READ_ONCE(task->tk_rpc_status) != 0) { + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock(&queue->lock); + continue; + } rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) - return; + goto out; /* sync task: sleep here */ trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, - TASK_KILLABLE); + TASK_KILLABLE|TASK_FREEZABLE); if (status < 0) { /* * When a sync task receives a signal, it exits with @@ -957,16 +994,15 @@ static void __rpc_execute(struct rpc_task *task) * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); + rpc_signal_task(task); } trace_rpc_task_sync_wake(task, task->tk_action); } /* Release all resources associated with the task */ rpc_release_task(task); +out: + current_restore_flags(pflags, PF_MEMALLOC); } /* @@ -1021,15 +1057,15 @@ int rpc_malloc(struct rpc_task *task) struct rpc_rqst *rqst = task->tk_rqstp; size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; - gfp_t gfp = GFP_NOFS; - - if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp_t gfp = rpc_task_gfp_mask(); size += sizeof(struct rpc_buffer); - if (size <= RPC_BUFFER_MAXSIZE) - buf = mempool_alloc(rpc_buffer_mempool, gfp); - else + if (size <= RPC_BUFFER_MAXSIZE) { + buf = kmem_cache_alloc(rpc_buffer_slabp, gfp); + /* Reach for the mempool if dynamic allocation fails */ + if (!buf && RPC_IS_ASYNC(task)) + buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT); + } else buf = kmalloc(size, gfp); if (!buf) @@ -1092,10 +1128,14 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta rpc_init_task_statistics(task); } -static struct rpc_task * -rpc_alloc_task(void) +static struct rpc_task *rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + struct rpc_task *task; + + task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask()); + if (task) + return task; + return mempool_alloc(rpc_task_mempool, GFP_NOWAIT); } /* @@ -1108,6 +1148,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) if (task == NULL) { task = rpc_alloc_task(); + if (task == NULL) { + rpc_release_calldata(setup_data->callback_ops, + setup_data->callback_data); + return ERR_PTR(-ENOMEM); + } flags = RPC_TASK_DYNAMIC; } diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index d52313af82bc..71ba4cf513bc 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -15,6 +15,7 @@ #include <linux/pagemap.h> #include <linux/udp.h> #include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/sched.h> #include <linux/sunrpc/xdr.h> #include <linux/export.h> @@ -220,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg, static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) { - int err; - - err = xdr_alloc_bvec(xdr, GFP_KERNEL); - if (err < 0) - return err; - iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len + xdr->page_base); return xprt_sendmsg(sock, msg, base + xdr->page_base); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index c964b48eaaba..52908f9e6eab 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) { static int rpc_proc_open(struct inode *inode, struct file *file) { - return single_open(file, rpc_proc_show, PDE_DATA(inode)); + return single_open(file, rpc_proc_show, pde_data(inode)); } static const struct proc_ops rpc_proc_ops = { diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 2f59464e6524..d4a362c9e4b3 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** (c) 2008 NetApp. All Rights Reserved. -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -https://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4292278a9552..149171774bc6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -37,18 +37,37 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) - #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +/* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -struct svc_pool_map svc_pool_map = { + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +static struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; -EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ @@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* * Add a reference to the global map of cpus to pools (and - * vice versa). Initialise the map if we're the first user. - * Returns the number of pools. + * vice versa) if pools are in use. + * Initialise the map if we're the first user. + * Returns the number of pools. If this is '1', no reference + * was taken. */ -unsigned int +static unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -232,6 +253,7 @@ svc_pool_map_get(void) if (m->count++) { mutex_unlock(&svc_pool_map_mutex); + WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -247,30 +269,36 @@ svc_pool_map_get(void) break; } - if (npools < 0) { + if (npools <= 0) { /* default, or memory allocation failure */ npools = 1; m->mode = SVC_POOL_GLOBAL; } m->npools = npools; + if (npools == 1) + /* service is unpooled, so doesn't hold a reference */ + m->count--; + mutex_unlock(&svc_pool_map_mutex); - return m->npools; + return npools; } -EXPORT_SYMBOL_GPL(svc_pool_map_get); /* - * Drop a reference to the global map of cpus to pools. + * Drop a reference to the global map of cpus to pools, if + * pools were in use, i.e. if npools > 1. * When the last reference is dropped, the map data is * freed; this allows the sysadmin to change the pool * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -void -svc_pool_map_put(void) +static void +svc_pool_map_put(int npools) { struct svc_pool_map *m = &svc_pool_map; + if (npools <= 1) + return; mutex_lock(&svc_pool_map_mutex); if (!--m->count) { @@ -283,7 +311,6 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } -EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -329,32 +356,35 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) } } -/* - * Use the mapping mode to choose a pool for a given CPU. - * Used when enqueueing an incoming RPC. Always returns - * a non-NULL pool pointer. +/** + * svc_pool_for_cpu - Select pool to run a thread on this cpu + * @serv: An RPC service + * + * Use the active CPU and the svc_pool_map's mode setting to + * select the svc thread pool to use. Once initialized, the + * svc_pool_map does not change. + * + * Return value: + * A pointer to an svc_pool */ -struct svc_pool * -svc_pool_for_cpu(struct svc_serv *serv, int cpu) +struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv) { struct svc_pool_map *m = &svc_pool_map; + int cpu = raw_smp_processor_id(); unsigned int pidx = 0; - /* - * An uninitialised map happens in a pure client when - * lockd is brought up, so silently treat it the - * same as SVC_POOL_GLOBAL. - */ - if (svc_serv_is_pooled(serv)) { - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; - } + if (serv->sv_nrpools <= 1) + return serv->sv_pools; + + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; } + return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -424,7 +454,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - const struct svc_serv_ops *ops) + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int vers; @@ -435,13 +465,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - serv->sv_nrthreads = 1; + kref_init(&serv->sv_refcnt); serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_ops = ops; + serv->sv_threadfn = threadfn; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -487,59 +517,56 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return serv; } -struct svc_serv * -svc_create(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create - Create an RPC service + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, + int (*threadfn)(void *data)) { - return __svc_create(prog, bufsize, /*npools*/1, ops); + return __svc_create(prog, bufsize, 1, threadfn); } EXPORT_SYMBOL_GPL(svc_create); -struct svc_serv * -svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - const struct svc_serv_ops *ops) +/** + * svc_create_pooled - Create an RPC service with pooled threads + * @prog: the RPC program the new service will handle + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ +struct svc_serv *svc_create_pooled(struct svc_program *prog, + unsigned int bufsize, + int (*threadfn)(void *data)) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, ops); + serv = __svc_create(prog, bufsize, npools, threadfn); if (!serv) goto out_err; return serv; out_err: - svc_pool_map_put(); + svc_pool_map_put(npools); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); -void svc_shutdown_net(struct svc_serv *serv, struct net *net) -{ - svc_close_net(serv, net); - - if (serv->sv_ops->svo_shutdown) - serv->sv_ops->svo_shutdown(serv, net); -} -EXPORT_SYMBOL_GPL(svc_shutdown_net); - /* * Destroy an RPC service. Should be called with appropriate locking to - * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. + * protect sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct svc_serv *serv) +svc_destroy(struct kref *ref) { - dprintk("svc: svc_destroy(%s, %d)\n", - serv->sv_program->pg_name, - serv->sv_nrthreads); - - if (serv->sv_nrthreads) { - if (--(serv->sv_nrthreads) != 0) { - svc_sock_update_bufs(serv); - return; - } - } else - printk("svc_destroy: no threads for serv=%p!\n", serv); + struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); del_timer_sync(&serv->sv_temptimer); /* @@ -551,8 +578,7 @@ svc_destroy(struct svc_serv *serv) cache_clean_deferred(serv); - if (svc_serv_is_pooled(serv)) - svc_pool_map_put(); + svc_pool_map_put(serv->sv_nrpools); kfree(serv->sv_pools); kfree(serv); @@ -638,7 +664,7 @@ out_enomem: } EXPORT_SYMBOL_GPL(svc_rqst_alloc); -struct svc_rqst * +static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -647,14 +673,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - serv->sv_nrthreads++; + svc_get(serv); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads += 1; + spin_unlock_bh(&serv->sv_lock); + spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; } -EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -728,11 +757,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - __module_get(serv->sv_ops->svo_module); - task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, + task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { - module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); return PTR_ERR(task); } @@ -748,59 +775,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } - -/* destroy old threads */ -static int -svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - struct task_struct *task; - unsigned int state = serv->sv_nrthreads-1; - - /* destroy old threads */ - do { - task = choose_victim(serv, pool, &state); - if (task == NULL) - break; - send_sig(SIGINT, task, 1); - nrservs++; - } while (nrservs < 0); - - return 0; -} - /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. - * - * Destroying threads relies on the service threads filling in - * rqstp->rq_task, which only the nfs ones do. Assumes the serv - * has been created using svc_create_pooled(). - * - * Based on code that used to be in nfsd_svc() but tweaked - * to be pool-aware. */ -int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } - - if (nrservs > 0) - return svc_start_kthreads(serv, pool, nrservs); - if (nrservs < 0) - return svc_signal_kthreads(serv, pool, nrservs); - return 0; -} -EXPORT_SYMBOL_GPL(svc_set_num_threads); /* destroy old threads */ static int @@ -821,11 +802,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } int -svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); + nrservs -= serv->sv_nrthreads; } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; @@ -838,7 +818,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser return svc_stop_kthreads(serv, pool, nrservs); return 0; } -EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); +EXPORT_SYMBOL_GPL(svc_set_num_threads); /** * svc_rqst_replace_page - Replace one page in rq_pages[] @@ -890,11 +870,14 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads -= 1; + spin_unlock_bh(&serv->sv_lock); + svc_sock_update_bufs(serv); + svc_rqst_free(rqstp); - /* Release the server */ - if (serv) - svc_destroy(serv); + svc_put(serv); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1222,7 +1205,7 @@ svc_generic_init_request(struct svc_rqst *rqstp, goto err_bad_proc; /* Initialize storage for argp and resp */ - memset(rqstp->rq_argp, 0, procp->pc_argsize); + memset(rqstp->rq_argp, 0, procp->pc_argzero); memset(rqstp->rq_resp, 0, procp->pc_ressize); /* Bump per-procedure stats counter */ @@ -1261,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - clear_bit(RQ_DROPME, &rqstp->rq_flags); + __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + __clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); @@ -1382,7 +1365,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_authorise(rqstp); close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_close_xprt(rqstp->rq_xprt); + svc_xprt_close(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; @@ -1451,8 +1434,7 @@ svc_process(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - struct svc_serv *serv = rqstp->rq_server; - u32 dir; + __be32 dir; #if IS_ENABLED(CONFIG_FAIL_SUNRPC) if (!fail_sunrpc.ignore_server_disconnect && @@ -1467,7 +1449,7 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_next_page = &rqstp->rq_respages[1]; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.pages = rqstp->rq_next_page; rqstp->rq_res.len = 0; rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; @@ -1475,18 +1457,17 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - dir = svc_getnl(argv); - if (dir != 0) { - /* direction != CALL */ - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - serv->sv_stats->rpcbadfmt++; + dir = svc_getu32(argv); + if (dir != rpc_call) + goto out_baddir; + if (!svc_process_common(rqstp, argv, resv)) goto out_drop; - } - - /* Returns 1 for send, 0 for drop */ - if (likely(svc_process_common(rqstp, argv, resv))) - return svc_send(rqstp); + return svc_send(rqstp); +out_baddir: + svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", + be32_to_cpu(dir)); + rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); return 0; @@ -1573,8 +1554,12 @@ out: EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * Return (transport-specific) limit on the rpc payload. +/** + * svc_max_payload - Return transport-specific limit on the RPC payload + * @rqstp: RPC transaction context + * + * Returns the maximum number of payload bytes the current transport + * allows. */ u32 svc_max_payload(const struct svc_rqst *rqstp) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1e99ba1b9d72..2106003645a7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,6 +6,7 @@ */ #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -161,7 +162,7 @@ static void svc_xprt_free(struct kref *kref) if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_cred(xprt->xpt_cred); - put_net(xprt->xpt_net); + put_net_track(xprt->xpt_net, &xprt->ns_tracker); /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); @@ -197,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); - xprt->xpt_net = get_net(net); + xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC); strcpy(xprt->xpt_remotebuf, "uninitialized"); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -243,7 +244,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); if (IS_ERR(xprt)) trace_svc_xprt_create_err(serv->sv_program->pg_name, - xcl->xcl_name, sap, xprt); + xcl->xcl_name, sap, len, xprt); return xprt; } @@ -264,15 +265,13 @@ void svc_xprt_received(struct svc_xprt *xprt) return; } - trace_svc_xprt_received(xprt); - /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_enqueue_xprt with: + * 'put', so we need a reference to call svc_xprt_enqueue with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); + svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); @@ -286,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) @@ -322,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, return -EPROTONOSUPPORT; } -int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, +/** + * svc_xprt_create - Add a new listener to @serv + * @serv: target RPC service + * @xprt_name: transport class name + * @net: network namespace + * @family: network address family + * @port: listener port + * @flags: SVC_SOCK flags + * @cred: credential to bind to this transport + * + * Return values: + * %0: New listener added successfully + * %-EPROTONOSUPPORT: Requested transport type not supported + */ +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { int err; - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); + err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); } return err; } -EXPORT_SYMBOL_GPL(svc_create_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_create); /* * Copy the local and remote xprt addresses to the rqstp structure @@ -412,6 +425,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); + if (xpt_flags & BIT(XPT_BUSY)) + return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { @@ -424,11 +439,15 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) return false; } -void svc_xprt_do_enqueue(struct svc_xprt *xprt) +/** + * svc_xprt_enqueue - Queue a transport on an idle nfsd thread + * @xprt: transport with data pending + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; - int cpu; if (!svc_xprt_ready(xprt)) return; @@ -441,8 +460,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) return; - cpu = get_cpu(); - pool = svc_pool_for_cpu(xprt->xpt_server, cpu); + pool = svc_pool_for_cpu(xprt->xpt_server); atomic_long_inc(&pool->sp_stats.packets); @@ -465,21 +483,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) rqstp = NULL; out_unlock: rcu_read_unlock(); - put_cpu(); - trace_svc_xprt_do_enqueue(xprt, rqstp); -} -EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); - -/* - * Queue up a transport with data pending. If there are idle nfsd - * processes, wake 'em up. - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) -{ - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) - return; - xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); + trace_svc_xprt_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); @@ -687,8 +691,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return -EINTR; } - trace_svc_alloc_arg_err(pages); - schedule_timeout(msecs_to_jiffies(500)); + trace_svc_alloc_arg_err(pages, ret); + memalloc_retry_wait(GFP_KERNEL); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ @@ -842,8 +846,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else svc_xprt_received(xprt); + out: - trace_svc_handle_xprt(xprt, len); return len; } @@ -1061,7 +1065,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt) svc_xprt_put(xprt); } -void svc_close_xprt(struct svc_xprt *xprt) +/** + * svc_xprt_close - Close a client connection + * @xprt: transport to disconnect + * + */ +void svc_xprt_close(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); @@ -1076,7 +1085,7 @@ void svc_close_xprt(struct svc_xprt *xprt) */ svc_delete_xprt(xprt); } -EXPORT_SYMBOL_GPL(svc_close_xprt); +EXPORT_SYMBOL_GPL(svc_xprt_close); static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { @@ -1128,7 +1137,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) } } -/* +/** + * svc_xprt_destroy_all - Destroy transports associated with @serv + * @serv: RPC service to be shut down + * @net: target network namespace + * * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). * @@ -1140,7 +1153,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * threads, we may need to wait a little while and then check again to * see if they're done. */ -void svc_close_net(struct svc_serv *serv, struct net *net) +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) { int delay = 0; @@ -1151,6 +1164,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net) msleep(delay++); } } +EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); /* * Handle defer and revisit of requests @@ -1213,7 +1227,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) dr->addrlen = rqstp->rq_addrlen; dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; - dr->xprt_hlen = rqstp->rq_xprt_hlen; + dr->xprt_ctxt = rqstp->rq_xprt_ctxt; + rqstp->rq_xprt_ctxt = NULL; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; @@ -1223,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - set_bit(RQ_DROPME, &rqstp->rq_flags); + __set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; @@ -1239,21 +1254,21 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) trace_svc_defer_recv(dr); /* setup iov_base past transport header */ - rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); + rqstp->rq_arg.head[0].iov_base = dr->args; /* The iov_len does not include the transport header bytes */ - rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen; + rqstp->rq_arg.head[0].iov_len = dr->argslen << 2; rqstp->rq_arg.page_len = 0; /* The rq_arg.len includes the transport header bytes */ - rqstp->rq_arg.len = dr->argslen<<2; + rqstp->rq_arg.len = dr->argslen << 2; rqstp->rq_prot = dr->prot; memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); rqstp->rq_addrlen = dr->addrlen; /* Save off transport header len in case we get deferred again */ - rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_xprt_ctxt = dr->xprt_ctxt; svc_xprt_received(rqstp->rq_xprt); - return (dr->argslen<<2) - dr->xprt_hlen; + return dr->argslen << 2; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5a8b8e03fdd4..e72ba2f13f6c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -31,10 +31,12 @@ */ extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; +extern struct auth_ops svcauth_tls; static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = { [RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null, [RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix, + [RPC_AUTH_TLS] = (struct auth_ops __force __rcu *)&svcauth_tls, }; static struct auth_ops * diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d7ed7d49115a..b1efc34db6ed 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -37,6 +37,7 @@ struct unix_domain { extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; +extern struct auth_ops svcauth_tls; static void svcauth_unix_domain_release_rcu(struct rcu_head *head) { @@ -789,6 +790,65 @@ struct auth_ops svcauth_null = { static int +svcauth_tls_accept(struct svc_rqst *rqstp) +{ + struct svc_cred *cred = &rqstp->rq_cred; + struct kvec *argv = rqstp->rq_arg.head; + struct kvec *resv = rqstp->rq_res.head; + + if (argv->iov_len < XDR_UNIT * 3) + return SVC_GARBAGE; + + /* Call's cred length */ + if (svc_getu32(argv) != xdr_zero) { + rqstp->rq_auth_stat = rpc_autherr_badcred; + return SVC_DENIED; + } + + /* Call's verifier flavor and its length */ + if (svc_getu32(argv) != rpc_auth_null || + svc_getu32(argv) != xdr_zero) { + rqstp->rq_auth_stat = rpc_autherr_badverf; + return SVC_DENIED; + } + + /* AUTH_TLS is not valid on non-NULL procedures */ + if (rqstp->rq_proc != 0) { + rqstp->rq_auth_stat = rpc_autherr_badcred; + return SVC_DENIED; + } + + /* Mapping to nobody uid/gid is required */ + cred->cr_uid = INVALID_UID; + cred->cr_gid = INVALID_GID; + cred->cr_group_info = groups_alloc(0); + if (cred->cr_group_info == NULL) + return SVC_CLOSE; /* kmalloc failure - client must retry */ + + /* Reply's verifier */ + svc_putnl(resv, RPC_AUTH_NULL); + if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + svc_putnl(resv, 8); + memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8); + resv->iov_len += 8; + } else + svc_putnl(resv, 0); + + rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS; + return SVC_OK; +} + +struct auth_ops svcauth_tls = { + .name = "tls", + .owner = THIS_MODULE, + .flavour = RPC_AUTH_TLS, + .accept = svcauth_tls_accept, + .release = svcauth_null_release, + .set_client = svcauth_unix_set_client, +}; + + +static int svcauth_unix_accept(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 478f857cdaed..2fc98fea59b4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -117,15 +117,6 @@ static void svc_reclassify_socket(struct socket *sock) */ static void svc_tcp_release_rqst(struct svc_rqst *rqstp) { - struct sk_buff *skb = rqstp->rq_xprt_ctxt; - - if (skb) { - struct svc_sock *svsk = - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - - rqstp->rq_xprt_ctxt = NULL; - skb_free_datagram_locked(svsk->sk_sk, skb); - } } /** @@ -259,8 +250,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, ssize_t len; size_t t; - rqstp->rq_xprt_hlen = 0; - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { @@ -309,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - set_bit(RQ_SECURE, &rqstp->rq_flags); + __set_bit(RQ_SECURE, &rqstp->rq_flags); else - clear_bit(RQ_SECURE, &rqstp->rq_flags); + __clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -464,7 +453,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err < 0) goto out_recv_err; - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err); if (!skb) goto out_recv_err; @@ -579,15 +568,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; + err = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (err < 0) + goto out_unlock; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); - xdr_free_bvec(xdr); } + xdr_free_bvec(xdr); trace_svcsock_udp_send(xprt, err); - +out_unlock: mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; @@ -1016,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - set_bit(RQ_LOCAL, &rqstp->rq_flags); + __set_bit(RQ_LOCAL, &rqstp->rq_flags); else - clear_bit(RQ_LOCAL, &rqstp->rq_flags); + __clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; @@ -1096,7 +1088,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, int ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 2766dd21935b..c1f559892ae8 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -93,11 +93,14 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; - if (!xprt) - return 0; + if (!xprt) { + ret = sprintf(buf, "<closed>\n"); + goto out; + } ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_put(xprt); - return ret + 1; +out: + return ret; } static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, @@ -105,38 +108,45 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); - struct sockaddr_storage saddr; - struct sock_xprt *sock; - ssize_t ret = -1; + size_t buflen = PAGE_SIZE; + ssize_t ret; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; - } - - sock = container_of(xprt, struct sock_xprt, xprt); - if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) - goto out; - - ret = sprintf(buf, "%pISc\n", &saddr); -out: + ret = sprintf(buf, "<closed>\n"); + } else if (xprt->ops->get_srcaddr) { + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret > 0) { + if (ret < buflen - 1) { + buf[ret] = '\n'; + ret++; + buf[ret] = '\0'; + } + } else + ret = sprintf(buf, "<closed>\n"); + } else + ret = sprintf(buf, "<not a socket>\n"); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) + struct kobj_attribute *attr, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + unsigned short srcport = 0; + size_t buflen = PAGE_SIZE; ssize_t ret; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; + ret = sprintf(buf, "<closed>\n"); + goto out; } - ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" + if (xprt->ops->get_srcport) + srcport = xprt->ops->get_srcport(xprt); + + ret = snprintf(buf, buflen, + "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" @@ -144,14 +154,12 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, - xprt->backlog.qlen, xprt->main, - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - get_srcport(xprt) : 0, + xprt->backlog.qlen, xprt->main, srcport, atomic_long_read(&xprt->queuelen), - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - xprt->address_strings[RPC_DISPLAY_PORT] : "0"); + xprt->address_strings[RPC_DISPLAY_PORT]); +out: xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, @@ -163,10 +171,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, int locked, connected, connecting, close_wait, bound, binding, closing, congested, cwnd_wait, write_space, offline, remove; - if (!xprt) - return 0; - - if (!xprt->state) { + if (!(xprt && xprt->state)) { ret = sprintf(buf, "state=CLOSED\n"); } else { locked = test_bit(XPRT_LOCKED, &xprt->state); @@ -198,7 +203,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, } xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, @@ -217,7 +222,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, xprt_switch->xps_nunique_destaddr_xprts, atomic_long_read(&xprt_switch->xps_queuelen)); xprt_switch_put(xprt_switch); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, @@ -286,8 +291,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, int offline = 0, online = 0, remove = 0; struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); - if (!xprt) - return 0; + if (!xprt || !xps) { + count = 0; + goto out_put; + } if (!strncmp(buf, "offline", 7)) offline = 1; @@ -295,8 +302,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, online = 1; else if (!strncmp(buf, "remove", 6)) remove = 1; - else - return -EINVAL; + else { + count = -EINVAL; + goto out_put; + } if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { count = -EINTR; @@ -307,29 +316,14 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, goto release_tasks; } if (offline) { - set_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive--; - spin_unlock(&xps->xps_lock); + xprt_set_offline_locked(xprt, xps); } else if (online) { - clear_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive++; - spin_unlock(&xps->xps_lock); + xprt_set_online_locked(xprt, xps); } else if (remove) { - if (test_bit(XPRT_OFFLINE, &xprt->state)) { - set_bit(XPRT_REMOVE, &xprt->state); - xprt_force_disconnect(xprt); - if (test_bit(XPRT_CONNECTED, &xprt->state)) { - if (!xprt->sending.qlen && - !xprt->pending.qlen && - !xprt->backlog.qlen && - !atomic_long_read(&xprt->queuelen)) - rpc_xprt_switch_remove_xprt(xps, xprt); - } - } else { + if (test_bit(XPRT_OFFLINE, &xprt->state)) + xprt_delete_locked(xprt, xps); + else count = -EINVAL; - } } release_tasks: @@ -422,6 +416,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_change_state.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); @@ -430,6 +425,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, @@ -439,14 +435,14 @@ static struct kobj_type rpc_sysfs_client_type = { static struct kobj_type rpc_sysfs_xprt_switch_type = { .release = rpc_sysfs_xprt_switch_release, - .default_attrs = rpc_sysfs_xprt_switch_attrs, + .default_groups = rpc_sysfs_xprt_switch_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_switch_namespace, }; static struct kobj_type rpc_sysfs_xprt_type = { .release = rpc_sysfs_xprt_release, - .default_attrs = rpc_sysfs_xprt_attrs, + .default_groups = rpc_sysfs_xprt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_namespace, }; @@ -522,13 +518,16 @@ void rpc_sysfs_client_setup(struct rpc_clnt *clnt, struct net *net) { struct rpc_sysfs_client *rpc_client; + struct rpc_sysfs_xprt_switch *xswitch = + (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + + if (!xswitch) + return; rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj, net, clnt->cl_clid); if (rpc_client) { char name[] = "switch"; - struct rpc_sysfs_xprt_switch *xswitch = - (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; int ret; clnt->cl_sysfs = rpc_client; @@ -562,6 +561,8 @@ void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch, rpc_xprt_switch->xprt_switch = xprt_switch; rpc_xprt_switch->xprt = xprt; kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD); + } else { + xprt_switch->xps_sysfs = NULL; } } @@ -573,6 +574,9 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch, struct rpc_sysfs_xprt_switch *switch_obj = (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + if (!switch_obj) + return; + rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags); if (rpc_xprt) { xprt->xprt_sysfs = rpc_xprt; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index df194cc07035..336a7c7833e4 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -775,6 +775,34 @@ static void xdr_buf_pages_shift_left(const struct xdr_buf *buf, xdr_buf_tail_copy_left(buf, 0, len - buf->page_len, shift); } +static void xdr_buf_head_shift_left(const struct xdr_buf *buf, + unsigned int base, unsigned int len, + unsigned int shift) +{ + const struct kvec *head = buf->head; + unsigned int bytes; + + if (!shift || !len) + return; + + if (shift > base) { + bytes = (shift - base); + if (bytes >= len) + return; + base += bytes; + len -= bytes; + } + + if (base < head->iov_len) { + bytes = min_t(unsigned int, len, head->iov_len - base); + memmove(head->iov_base + (base - shift), + head->iov_base + base, bytes); + base += bytes; + len -= bytes; + } + xdr_buf_pages_shift_left(buf, base - head->iov_len, len, shift); +} + /** * xdr_shrink_bufhead * @buf: xdr_buf @@ -919,7 +947,29 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, EXPORT_SYMBOL_GPL(xdr_init_encode); /** - * xdr_commit_encode - Ensure all data is written to buffer + * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer into which to encode data + * @pages: list of pages to decode into + * @rqst: pointer to controlling rpc_rqst, for debugging + * + */ +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst) +{ + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->page_ptr = pages; + xdr->iov = NULL; + xdr->p = page_address(*pages); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); + xdr->rqst = rqst; +} +EXPORT_SYMBOL_GPL(xdr_init_encode_pages); + +/** + * __xdr_commit_encode - Ensure all data is written to buffer * @xdr: pointer to xdr_stream * * We handle encoding across page boundaries by giving the caller a @@ -931,26 +981,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode); * required at the end of encoding, or any other time when the xdr_buf * data might be read. */ -inline void xdr_commit_encode(struct xdr_stream *xdr) +void __xdr_commit_encode(struct xdr_stream *xdr) { - int shift = xdr->scratch.iov_len; + size_t shift = xdr->scratch.iov_len; void *page; - if (shift == 0) - return; page = page_address(*xdr->page_ptr); memcpy(xdr->scratch.iov_base, page, shift); memmove(page, page + shift, (void *)xdr->p - page); xdr_reset_scratch_buffer(xdr); } -EXPORT_SYMBOL_GPL(xdr_commit_encode); +EXPORT_SYMBOL_GPL(__xdr_commit_encode); -static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, - size_t nbytes) +/* + * The buffer space to be reserved crosses the boundary between + * xdr->buf->head and xdr->buf->pages, or between two pages + * in xdr->buf->pages. + */ +static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, + size_t nbytes) { - __be32 *p; int space_left; int frag1bytes, frag2bytes; + void *p; if (nbytes > PAGE_SIZE) goto out_overflow; /* Bigger buffers require special handling */ @@ -964,6 +1017,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, xdr->buf->page_len += frag1bytes; xdr->page_ptr++; xdr->iov = NULL; + /* * If the last encode didn't end exactly on a page boundary, the * next one will straddle boundaries. Encode into the next @@ -972,14 +1026,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, * space at the end of the previous buffer: */ xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes); - p = page_address(*xdr->page_ptr); + /* - * Note this is where the next encode will start after we've - * shifted this one back: + * xdr->p is where the next encode will start after + * xdr_commit_encode() has shifted this one back: */ - xdr->p = (void *)p + frag2bytes; + p = page_address(*xdr->page_ptr); + xdr->p = p + frag2bytes; space_left = xdr->buf->buflen - xdr->buf->len; - xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + if (space_left - frag1bytes >= PAGE_SIZE) + xdr->end = p + PAGE_SIZE; + else + xdr->end = p + space_left - frag1bytes; + xdr->buf->page_len += frag2bytes; xdr->buf->len += nbytes; return p; @@ -1463,71 +1522,35 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); -unsigned int xdr_align_data(struct xdr_stream *xdr, unsigned int offset, - unsigned int length) -{ - struct xdr_buf *buf = xdr->buf; - unsigned int from, bytes, len; - unsigned int shift; - - xdr_realign_pages(xdr); - from = xdr_page_pos(xdr); - - if (from >= buf->page_len + buf->tail->iov_len) - return 0; - if (from + buf->head->iov_len >= buf->len) - return 0; - - len = buf->len - buf->head->iov_len; - - /* We only shift data left! */ - if (WARN_ONCE(from < offset, "SUNRPC: misaligned data src=%u dst=%u\n", - from, offset)) - return 0; - if (WARN_ONCE(offset > buf->page_len, - "SUNRPC: buffer overflow. offset=%u, page_len=%u\n", - offset, buf->page_len)) - return 0; - - /* Move page data to the left */ - shift = from - offset; - xdr_buf_pages_shift_left(buf, from, len, shift); - - bytes = xdr_stream_remaining(xdr); - if (length > bytes) - length = bytes; - bytes -= length; - - xdr->buf->len -= shift; - xdr_set_page(xdr, offset + length, bytes); - return length; -} -EXPORT_SYMBOL_GPL(xdr_align_data); - -unsigned int xdr_expand_hole(struct xdr_stream *xdr, unsigned int offset, - unsigned int length) +/** + * xdr_set_pagelen - Sets the length of the XDR pages + * @xdr: pointer to xdr_stream struct + * @len: new length of the XDR page data + * + * Either grows or shrinks the length of the xdr pages by setting pagelen to + * @len bytes. When shrinking, any extra data is moved into buf->tail, whereas + * when growing any data beyond the current pointer is moved into the tail. + * + * Returns True if the operation was successful, and False otherwise. + */ +void xdr_set_pagelen(struct xdr_stream *xdr, unsigned int len) { struct xdr_buf *buf = xdr->buf; - unsigned int from, to, shift; - - xdr_realign_pages(xdr); - from = xdr_page_pos(xdr); - to = xdr_align_size(offset + length); - - /* Could the hole be behind us? */ - if (to > from) { - unsigned int buflen = buf->len - buf->head->iov_len; - shift = to - from; - xdr_buf_try_expand(buf, shift); - xdr_buf_pages_shift_right(buf, from, buflen, shift); - xdr_set_page(xdr, to, xdr_stream_remaining(xdr)); - } else if (to != from) - xdr_align_data(xdr, to, 0); - xdr_buf_pages_zero(buf, offset, length); + size_t remaining = xdr_stream_remaining(xdr); + size_t base = 0; - return length; + if (len < buf->page_len) { + base = buf->page_len - len; + xdr_shrink_pagelen(buf, len); + } else { + xdr_buf_head_shift_right(buf, xdr_stream_pos(xdr), + buf->page_len, remaining); + if (len > buf->page_len) + xdr_buf_try_expand(buf, len - buf->page_len); + } + xdr_set_tail_base(xdr, base, remaining); } -EXPORT_SYMBOL_GPL(xdr_expand_hole); +EXPORT_SYMBOL_GPL(xdr_set_pagelen); /** * xdr_enter_page - decode data from the XDR page @@ -1574,7 +1597,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov); * * @buf and @subbuf may be pointers to the same struct xdr_buf. * - * Returns -1 if base of length are out of bounds. + * Returns -1 if base or length are out of bounds. */ int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, unsigned int base, unsigned int len) @@ -1672,6 +1695,60 @@ bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, EXPORT_SYMBOL_GPL(xdr_stream_subsegment); /** + * xdr_stream_move_subsegment - Move part of a stream to another position + * @xdr: the source xdr_stream + * @offset: the source offset of the segment + * @target: the target offset of the segment + * @length: the number of bytes to move + * + * Moves @length bytes from @offset to @target in the xdr_stream, overwriting + * anything in its space. Returns the number of bytes in the segment. + */ +unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, + unsigned int target, unsigned int length) +{ + struct xdr_buf buf; + unsigned int shift; + + if (offset < target) { + shift = target - offset; + if (xdr_buf_subsegment(xdr->buf, &buf, offset, shift + length) < 0) + return 0; + xdr_buf_head_shift_right(&buf, 0, length, shift); + } else if (offset > target) { + shift = offset - target; + if (xdr_buf_subsegment(xdr->buf, &buf, target, shift + length) < 0) + return 0; + xdr_buf_head_shift_left(&buf, shift, length, shift); + } + return length; +} +EXPORT_SYMBOL_GPL(xdr_stream_move_subsegment); + +/** + * xdr_stream_zero - zero out a portion of an xdr_stream + * @xdr: an xdr_stream to zero out + * @offset: the starting point in the stream + * @length: the number of bytes to zero + */ +unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset, + unsigned int length) +{ + struct xdr_buf buf; + + if (xdr_buf_subsegment(xdr->buf, &buf, offset, length) < 0) + return 0; + if (buf.head[0].iov_len) + xdr_buf_iov_zero(buf.head, 0, buf.head[0].iov_len); + if (buf.page_len > 0) + xdr_buf_pages_zero(&buf, 0, buf.page_len); + if (buf.tail[0].iov_len) + xdr_buf_iov_zero(buf.tail, 0, buf.tail[0].iov_len); + return length; +} +EXPORT_SYMBOL_GPL(xdr_stream_zero); + +/** * xdr_buf_trim - lop at most "len" bytes off the end of "buf" * @buf: buf to be trimmed * @len: number of bytes to reduce "buf" by diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb28..656cec208371 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -69,10 +69,11 @@ /* * Local functions */ -static void xprt_init(struct rpc_xprt *xprt, struct net *net); +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); -static void xprt_destroy(struct rpc_xprt *xprt); -static void xprt_request_init(struct rpc_task *task); +static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); +static int xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -929,12 +930,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - trace_xprt_disconnect_cleanup(xprt); - xprt->ops->close(xprt); - } - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); @@ -1143,16 +1139,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) * @task: RPC task * */ -void +int xprt_request_enqueue_receive(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + int ret; if (!xprt_request_need_enqueue_receive(task, req)) - return; + return 0; - xprt_request_prepare(task->tk_rqstp); + ret = xprt_request_prepare(task->tk_rqstp, &req->rq_rcv_buf); + if (ret) + return ret; spin_lock(&xprt->queue_lock); /* Update the softirq receive buffer */ @@ -1166,6 +1165,7 @@ xprt_request_enqueue_receive(struct rpc_task *task) /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); + return 0; } /** @@ -1218,6 +1218,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) xprt->stat.recvs++; + xdr_free_bvec(&req->rq_rcv_buf); + req->rq_private_buf.bvec = NULL; req->rq_private_buf.len = copied; /* Ensure all writes are done before we update */ /* req->rq_reply_bytes_recvd */ @@ -1336,8 +1338,14 @@ xprt_request_enqueue_transmit(struct rpc_task *task) { struct rpc_rqst *pos, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + int ret; if (xprt_request_need_enqueue_transmit(task, req)) { + ret = xprt_request_prepare(task->tk_rqstp, &req->rq_snd_buf); + if (ret) { + task->tk_status = ret; + return; + } req->rq_bytes_sent = 0; spin_lock(&xprt->queue_lock); /* @@ -1354,17 +1362,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else if (RPC_IS_SWAPPER(task)) { - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { - if (pos->rq_cong || pos->rq_bytes_sent) - continue; - if (RPC_IS_SWAPPER(pos->rq_task)) - continue; - /* Note: req is added _before_ pos */ - list_add_tail(&req->rq_xmit, &pos->rq_xmit); - INIT_LIST_HEAD(&req->rq_xmit2); - goto out; - } } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) @@ -1408,6 +1405,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task) } else list_del(&req->rq_xmit2); atomic_long_dec(&req->rq_xprt->xmit_queuelen); + xdr_free_bvec(&req->rq_snd_buf); } /** @@ -1444,8 +1442,6 @@ xprt_request_dequeue_xprt(struct rpc_task *task) test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || xprt_is_pinned_rqst(req)) { spin_lock(&xprt->queue_lock); - xprt_request_dequeue_transmit_locked(task); - xprt_request_dequeue_receive_locked(task); while (xprt_is_pinned_rqst(req)) { set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1453,24 +1449,30 @@ xprt_request_dequeue_xprt(struct rpc_task *task) spin_lock(&xprt->queue_lock); clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); } + xprt_request_dequeue_transmit_locked(task); + xprt_request_dequeue_receive_locked(task); spin_unlock(&xprt->queue_lock); + xdr_free_bvec(&req->rq_rcv_buf); } } /** * xprt_request_prepare - prepare an encoded request for transport * @req: pointer to rpc_rqst + * @buf: pointer to send/rcv xdr_buf * * Calls into the transport layer to do whatever is needed to prepare * the request for transmission or receive. + * Returns error, or zero. */ -void -xprt_request_prepare(struct rpc_rqst *req) +static int +xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf) { struct rpc_xprt *xprt = req->rq_xprt; if (xprt->ops->prepare_request) - xprt->ops->prepare_request(req); + return xprt->ops->prepare_request(req, buf); + return 0; } /** @@ -1503,6 +1505,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) return false; } + if (atomic_read(&xprt->swapper)) + /* This will be clear in __rpc_execute */ + current->flags |= PF_MEMALLOC; return true; } @@ -1692,7 +1697,7 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); + req = kzalloc(sizeof(*req), rpc_task_gfp_mask()); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; @@ -1783,7 +1788,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) { int id; - id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL); + id = ida_alloc(&rpc_xprt_ids, GFP_KERNEL); if (id < 0) return id; @@ -1793,7 +1798,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) static void xprt_free_id(struct rpc_xprt *xprt) { - ida_simple_remove(&rpc_xprt_ids, xprt->id); + ida_free(&rpc_xprt_ids, xprt->id); } struct rpc_xprt *xprt_alloc(struct net *net, size_t size, @@ -1817,10 +1822,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, goto out_free; list_add(&req->rq_list, &xprt->free); } - if (max_alloc > num_prealloc) - xprt->max_reqs = max_alloc; - else - xprt->max_reqs = num_prealloc; + xprt->max_reqs = max_t(unsigned int, max_alloc, num_prealloc); xprt->min_reqs = num_prealloc; xprt->num_reqs = num_prealloc; @@ -1835,7 +1837,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { - put_net(xprt->xprt_net); + put_net_track(xprt->xprt_net, &xprt->ns_tracker); xprt_free_all_slots(xprt); xprt_free_id(xprt); rpc_sysfs_xprt_destroy(xprt); @@ -1863,7 +1865,7 @@ xprt_alloc_xid(struct rpc_xprt *xprt) static void xprt_init_xid(struct rpc_xprt *xprt) { - xprt->xid = prandom_u32(); + xprt->xid = get_random_u32(); } static void @@ -1967,8 +1969,6 @@ void xprt_release(struct rpc_task *task) spin_unlock(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(task); - xdr_free_bvec(&req->rq_rcv_buf); - xdr_free_bvec(&req->rq_snd_buf); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); if (req->rq_release_snd_buf) @@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) xprt_init_xid(xprt); - xprt->xprt_net = get_net(net); + xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL); } /** @@ -2112,7 +2112,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can @@ -2151,3 +2158,35 @@ void xprt_put(struct rpc_xprt *xprt) kref_put(&xprt->kref, xprt_destroy_kref); } EXPORT_SYMBOL_GPL(xprt_put); + +void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps) +{ + if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } +} + +void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps) +{ + if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } +} + +void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps) +{ + if (test_and_set_bit(XPRT_REMOVE, &xprt->state)) + return; + + xprt_force_disconnect(xprt); + if (!test_bit(XPRT_CONNECTED, &xprt->state)) + return; + + if (!xprt->sending.qlen && !xprt->pending.qlen && + !xprt->backlog.qlen && !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt, true); +} diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 1693f81aae37..701250b305db 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -27,6 +27,7 @@ typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; +static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) @@ -61,11 +62,11 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, } static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, - struct rpc_xprt *xprt) + struct rpc_xprt *xprt, bool offline) { if (unlikely(xprt == NULL)) return; - if (!test_bit(XPRT_OFFLINE, &xprt->state)) + if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline) xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) @@ -78,14 +79,15 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt + * @offline: indicates if the xprt that's being removed is in an offline state * * Removes xprt from the list of struct rpc_xprt in xps. */ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, - struct rpc_xprt *xprt) + struct rpc_xprt *xprt, bool offline) { spin_lock(&xps->xps_lock); - xprt_switch_remove_xprt_locked(xps, xprt); + xprt_switch_remove_xprt_locked(xps, xprt, offline); spin_unlock(&xps->xps_lock); xprt_put(xprt); } @@ -101,7 +103,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) { int id; - id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); + id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); if (id < 0) return id; @@ -111,7 +113,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) static void xprt_switch_free_id(struct rpc_xprt_switch *xps) { - ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); + ida_free(&rpc_xprtswitch_ids, xps->xps_id); } /** @@ -154,7 +156,7 @@ static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) xprt = list_first_entry(&xps->xps_xprt_list, struct rpc_xprt, xprt_switch); - xprt_switch_remove_xprt_locked(xps, xprt); + xprt_switch_remove_xprt_locked(xps, xprt, true); spin_unlock(&xps->xps_lock); xprt_put(xprt); spin_lock(&xps->xps_lock); @@ -249,6 +251,18 @@ struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) } static +struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) +{ + struct rpc_xprt *pos; + + list_for_each_entry_rcu(pos, head, xprt_switch) { + if (!xprt_is_active(pos)) + return pos; + } + return NULL; +} + +static struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); @@ -259,8 +273,9 @@ struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) } static -struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, - const struct rpc_xprt *cur) +struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, + const struct rpc_xprt *cur, + bool find_active) { struct rpc_xprt *pos; bool found = false; @@ -268,14 +283,25 @@ struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == pos) found = true; - if (found && xprt_is_active(pos)) + if (found && ((find_active && xprt_is_active(pos)) || + (!find_active && xprt_is_active(pos)))) return pos; } return NULL; } static -struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) +struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, + const struct rpc_xprt *cur) +{ + return _xprt_switch_find_current_entry(head, cur, true); +} + +static +struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, + struct rpc_xprt *first_entry(struct list_head *head), + struct rpc_xprt *current_entry(struct list_head *head, + const struct rpc_xprt *cur)) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); struct list_head *head; @@ -284,8 +310,30 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) return NULL; head = &xps->xps_xprt_list; if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) - return xprt_switch_find_first_entry(head); - return xprt_switch_find_current_entry(head, xpi->xpi_cursor); + return first_entry(head); + return current_entry(head, xpi->xpi_cursor); +} + +static +struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) +{ + return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, + xprt_switch_find_current_entry); +} + +static +struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, + const struct rpc_xprt *cur) +{ + return _xprt_switch_find_current_entry(head, cur, false); +} + +static +struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) +{ + return _xprt_iter_current_entry(xpi, + xprt_switch_find_first_entry_offline, + xprt_switch_find_current_entry_offline); } bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, @@ -310,7 +358,7 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, - const struct rpc_xprt *cur) + const struct rpc_xprt *cur, bool check_active) { struct rpc_xprt *pos, *prev = NULL; bool found = false; @@ -318,7 +366,12 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == prev) found = true; - if (found && xprt_is_active(pos)) + /* for request to return active transports return only + * active, for request to return offline transports + * return only offline + */ + if (found && ((check_active && xprt_is_active(pos)) || + (!check_active && !xprt_is_active(pos)))) return pos; prev = pos; } @@ -355,7 +408,7 @@ struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head { struct rpc_xprt *ret; - ret = xprt_switch_find_next_entry(head, cur); + ret = xprt_switch_find_next_entry(head, cur, true); if (ret != NULL) return ret; return xprt_switch_find_first_entry(head); @@ -397,7 +450,14 @@ static struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { - return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur); + return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); +} + +static +struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, + const struct rpc_xprt *cur) +{ + return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); } static @@ -407,6 +467,13 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) xprt_switch_find_next_entry_all); } +static +struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) +{ + return xprt_iter_next_entry_multiple(xpi, + xprt_switch_find_next_entry_offline); +} + /* * xprt_iter_rewind - Resets the xprt iterator * @xpi: pointer to rpc_xprt_iter @@ -414,7 +481,6 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) * Resets xpi to ensure that it points to the first entry in the list * of transports. */ -static void xprt_iter_rewind(struct rpc_xprt_iter *xpi) { rcu_read_lock(); @@ -460,6 +526,12 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); } +void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps) +{ + __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); +} + /** * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch * @xpi: pointer to rpc_xprt_iter @@ -574,3 +646,10 @@ const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { .xpi_xprt = xprt_iter_current_entry, .xpi_next = xprt_iter_next_entry_all, }; + +static +const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { + .xpi_rewind = xprt_iter_default_rewind, + .xpi_xprt = xprt_iter_current_entry_offline, + .xpi_next = xprt_iter_next_entry_offline, +}; diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 17f174d6ea3b..e4d84a13c566 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -13,10 +13,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - #undef RPCRDMA_BACKCHANNEL_DEBUG /** @@ -193,7 +189,7 @@ create_req: return NULL; size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE); - req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, size); if (!req) return NULL; if (rpcrdma_req_setup(r_xprt, req)) { diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ff699307e820..ffbf99894970 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -45,10 +45,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpcrdma_mr *mr) { @@ -128,16 +124,16 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) unsigned int depth = ep->re_max_fr_depth; struct scatterlist *sg; struct ib_mr *frmr; - int rc; + + sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(ep->re_id->device)); + if (!sg) + return -ENOMEM; frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth); if (IS_ERR(frmr)) goto out_mr_err; - sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); - if (!sg) - goto out_list_err; - mr->mr_xprt = r_xprt; mr->mr_ibmr = frmr; mr->mr_device = NULL; @@ -150,13 +146,9 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) return 0; out_mr_err: - rc = PTR_ERR(frmr); - trace_xprtrdma_frwr_alloc(mr, rc); - return rc; - -out_list_err: - ib_dereg_mr(frmr); - return -ENOMEM; + kfree(sg); + trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr)); + return PTR_ERR(frmr); } /** @@ -199,7 +191,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) ep->re_attr.cap.max_recv_sge = 1; ep->re_mrtype = IB_MR_TYPE_MEM_REG; - if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG) ep->re_mrtype = IB_MR_TYPE_SG_GAPS; /* Quirk: Some devices advertise a large max_fast_reg_page_list_len diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8035a983c8ce..190a4de239c8 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -54,10 +54,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a @@ -1125,6 +1121,7 @@ static bool rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) #if defined(CONFIG_SUNRPC_BACKCHANNEL) { + struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct xdr_stream *xdr = &rep->rr_stream; __be32 *p; @@ -1148,6 +1145,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) if (*p != cpu_to_be32(RPC_CALL)) return false; + /* No bc service. */ + if (xprt->bc_serv == NULL) + return false; + /* Now that we are sure this is a backchannel call, * advance to the RPC header. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 16897fcb659c..aa2227a7e552 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -119,12 +119,12 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - page = alloc_page(RPCRDMA_DEF_GFP); + page = alloc_page(GFP_NOIO | __GFP_NOWARN); if (!page) return -ENOMEM; rqst->rq_buffer = page_address(page); - rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); + rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, GFP_NOIO | __GFP_NOWARN); if (!rqst->rq_rbuffer) { put_page(page); return -ENOMEM; @@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) ret = rpcrdma_bc_send_request(rdma, rqst); if (ret == -ENOTCONN) - svc_close_xprt(sxprt); + svc_xprt_close(sxprt); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad127b..5242ad121450 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -831,7 +831,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_err; if (ret == 0) goto out_drop; - rqstp->rq_xprt_hlen = ret; if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 5f0155fdefc7..11cf7c646644 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, unsigned int write_len; u64 offset; - seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; - if (!seg) + if (info->wi_seg_no >= info->wi_chunk->ch_segcount) goto out_overflow; + seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; write_len = min(remaining, seg->rs_length - info->wi_seg_off); if (!write_len) goto out_overflow; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 94b20fb47135..199fa012f18a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - set_bit(RQ_SECURE, &rqstp->rq_flags); + __set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 16e5696314a4..10bb2b929c6d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -60,10 +60,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* * tunables */ @@ -239,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + unsigned int pflags = current->flags; int rc; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); if (!rc) { @@ -254,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work) rpcrdma_xprt_disconnect(r_xprt); xprt_unlock_connect(xprt, r_xprt); xprt_wake_pending_tasks(xprt, rc); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -494,8 +494,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } trace_xprtrdma_op_connect(r_xprt, delay); - queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, - delay); + queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay); } /** @@ -521,7 +520,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - task->tk_status = -EAGAIN; + task->tk_status = -ENOMEM; xprt_add_backlog(xprt, task); } @@ -571,11 +570,7 @@ xprt_rdma_allocate(struct rpc_task *task) struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - gfp_t flags; - - flags = RPCRDMA_DEF_GFP; - if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp_t flags = rpc_task_gfp_mask(); if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3d3673ba9e1e..44b87e4274b4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -63,17 +63,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -/* - * Globals/Macros - */ - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* - * internal functions - */ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, @@ -87,8 +76,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_ep_get(struct rpcrdma_ep *ep); static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags); +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); @@ -274,8 +262,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %pISpc rejected: %s\n", - sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -ENOTCONN; @@ -291,8 +277,6 @@ disconnected: break; } - dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, - ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -388,7 +372,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; - ep = kzalloc(sizeof(*ep), GFP_NOFS); + ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS); if (!ep) return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; @@ -419,14 +403,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_attr.qp_type = IB_QPT_RC; ep->re_attr.port_num = ~0; - dprintk("RPC: %s: requested max: dtos: send %d recv %d; " - "iovs: send %d recv %d\n", - __func__, - ep->re_attr.cap.max_send_wr, - ep->re_attr.cap.max_recv_wr, - ep->re_attr.cap.max_send_sge, - ep->re_attr.cap.max_recv_sge); - ep->re_send_batch = ep->re_max_requests >> 3; ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); @@ -436,6 +412,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.send_cq)) { rc = PTR_ERR(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; goto out_destroy; } @@ -444,6 +421,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.recv_cq)) { rc = PTR_ERR(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; goto out_destroy; } ep->re_receive_count = 0; @@ -482,6 +460,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_pd = ib_alloc_pd(device, 0); if (IS_ERR(ep->re_pd)) { rc = PTR_ERR(ep->re_pd); + ep->re_pd = NULL; goto out_destroy; } @@ -626,7 +605,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) struct rpcrdma_sendctx *sc; sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), - GFP_KERNEL); + XPRTRDMA_GFP_FLAGS); if (!sc) return NULL; @@ -649,7 +628,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * Sends are posted. */ i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; - buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); + buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS); if (!buf->rb_sc_ctxs) return -ENOMEM; @@ -760,13 +739,16 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct ib_device *device = ep->re_id->device; unsigned int count; + /* Try to allocate enough to perform one full-sized I/O */ for (count = 0; count < ep->re_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; - mr = kzalloc(sizeof(*mr), GFP_NOFS); + mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(device)); if (!mr) break; @@ -811,38 +793,33 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) /* If there is no underlying connection, it's no use * to wake the refresh worker. */ - if (ep->re_connect_status == 1) { - /* The work is scheduled on a WQ_MEM_RECLAIM - * workqueue in order to prevent MR allocation - * from recursing into NFS during direct reclaim. - */ - queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); - } + if (ep->re_connect_status != 1) + return; + queue_work(system_highpri_wq, &buf->rb_refresh_worker); } /** * rpcrdma_req_create - Allocate an rpcrdma_req object * @r_xprt: controlling r_xprt * @size: initial size, in bytes, of send and receive buffers - * @flags: GFP flags passed to memory allocators * * Returns an allocated and fully initialized rpcrdma_req or NULL. */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags) +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_req *req; - req = kzalloc(sizeof(*req), flags); + req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS); if (req == NULL) goto out1; - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE); if (!req->rl_sendbuf) goto out2; - req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE); if (!req->rl_recvbuf) goto out3; @@ -878,7 +855,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), - DMA_TO_DEVICE, GFP_KERNEL); + DMA_TO_DEVICE); if (!rb) goto out; @@ -949,12 +926,12 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - rep = kzalloc(sizeof(*rep), GFP_KERNEL); + rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); if (rep == NULL) goto out; rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, - DMA_FROM_DEVICE, GFP_KERNEL); + DMA_FROM_DEVICE); if (!rep->rr_rdmabuf) goto out_free; @@ -1084,8 +1061,8 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, - GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, + RPCRDMA_V1_DEF_INLINE_SIZE * 2); if (!req) goto out; list_add(&req->rl_list, &buf->rb_send_bufs); @@ -1255,15 +1232,14 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) * or Replies they may be registered externally via frwr_map. */ static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags) +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb), flags); + rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); if (!rb) return NULL; - rb->rg_data = kmalloc(size, flags); + rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); if (!rb->rg_data) { kfree(rb); return NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c79f92eeda76..5e5ff6784ef5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -149,7 +149,11 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) return rb->rg_data; } -#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) +/* Do not use emergency memory reserves, and fail quickly if memory + * cannot be allocated easily. These flags may be used wherever there + * is robust logic to handle a failure to allocate. + */ +#define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) /* To ensure a transport can always make forward progress, * the number of RDMA segments allowed in header chunk lists @@ -467,8 +471,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); /* * Buffer calls - xprtrdma/verbs.c */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags); +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size); int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d8ee06a9650a..915b9902f673 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -58,6 +58,7 @@ #include "sunrpc.h" static void xs_close(struct rpc_xprt *xprt); +static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock); @@ -260,7 +261,7 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_LOCAL: sun = xs_addr_un(xprt); - strlcpy(buf, sun->sun_path, sizeof(buf)); + strscpy(buf, sun->sun_path, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); break; @@ -427,9 +428,9 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, offset += want; } - want = xs_alloc_sparse_pages(buf, - min_t(size_t, count - offset, buf->page_len), - GFP_KERNEL); + want = xs_alloc_sparse_pages( + buf, min_t(size_t, count - offset, buf->page_len), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), @@ -763,12 +764,12 @@ xs_stream_start_connect(struct sock_xprt *transport) /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -780,32 +781,50 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { /* wait for more buffer space */ + set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} + +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); +static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - sk->sk_write_space(sk); - } + if (vm_wait) + return -ENOBUFS; + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } -static void -xs_stream_prepare_request(struct rpc_rqst *req) +static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf) { - xdr_free_bvec(&req->rq_rcv_buf); - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); + return xdr_alloc_bvec(buf, rpc_task_gfp_mask()); } /* @@ -851,27 +870,27 @@ static int xs_local_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; + bool vm_wait; unsigned int sent; int status; /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); - if (status == -EAGAIN && sock_writeable(transport->inet)) - status = -ENOBUFS; - if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; @@ -881,20 +900,19 @@ static int xs_local_send_request(struct rpc_rqst *req) return 0; } status = -EAGAIN; + vm_wait = false; } switch (status) { - case -ENOBUFS: - break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); fallthrough; case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -935,6 +953,9 @@ static int xs_udp_send_request(struct rpc_rqst *req) if (!xprt_request_get_cong(xprt, req)) return -EBADSLT; + status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (status < 0) + return status; req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); @@ -963,7 +984,7 @@ process_status: /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1005,7 +1026,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; - bool vm_wait = false; + bool vm_wait; unsigned int sent; int status; @@ -1025,12 +1046,17 @@ static int xs_tcp_send_request(struct rpc_rqst *req) if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) xs_tcp_set_socket_timeouts(xprt, transport->sock); + xs_set_srcport(transport, transport->sock); + /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ req->rq_xtime = ktime_get(); tcp_sock_set_cork(transport->inet, true); - while (1) { + + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + + do { status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); @@ -1051,31 +1077,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req) WARN_ON_ONCE(sent == 0 && status == 0); - if (status == -EAGAIN ) { - /* - * Return EAGAIN if we're sure we're hitting the - * socket send buffer limits. - */ - if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - break; - /* - * Did we hit a memory allocation failure? - */ - if (sent == 0) { - status = -ENOBUFS; - if (vm_wait) - break; - /* Retry, knowing now that we're below the - * socket send buffer limit - */ - vm_wait = true; - } - continue; - } - if (status < 0) - break; - vm_wait = false; - } + if (sent > 0) + vm_wait = false; + + } while (status == 0); switch (status) { case -ENOTSOCK: @@ -1083,7 +1088,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; case -ECONNRESET: case -ECONNREFUSED: @@ -1124,6 +1129,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) @@ -1179,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1202,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } @@ -1331,7 +1347,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_udp(sk, 0, 1, &err); + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (skb == NULL) break; xs_udp_data_read_skb(&transport->xprt, sk, skb); @@ -1354,7 +1370,7 @@ static void xs_udp_data_receive_workfn(struct work_struct *work) } /** - * xs_data_ready - "data ready" callback for UDP sockets + * xs_data_ready - "data ready" callback for sockets * @sk: socket with data to read * */ @@ -1362,11 +1378,13 @@ static void xs_data_ready(struct sock *sk) { struct rpc_xprt *xprt; - dprintk("RPC: xs_data_ready...\n"); xprt = xprt_from_sock(sk); if (xprt != NULL) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + trace_xs_data_ready(xprt); + transport->old_data_ready(sk); /* Any data means we had a useful conversation, so * then we don't need to delay the next reconnect @@ -1395,6 +1413,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /** + * xs_local_state_change - callback to handle AF_LOCAL socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_local_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + struct sock_xprt *transport; + + if (!(xprt = xprt_from_sock(sk))) + return; + transport = container_of(xprt, struct sock_xprt, xprt); + if (sk->sk_shutdown & SHUTDOWN_MASK) { + clear_bit(XPRT_CONNECTED, &xprt->state); + /* Trigger the socket release */ + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); + } +} + +/** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed * @@ -1470,7 +1508,6 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { - struct socket_wq *wq; struct sock_xprt *transport; struct rpc_xprt *xprt; @@ -1481,15 +1518,10 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; transport = container_of(xprt, struct sock_xprt, xprt); - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) - goto out; - + if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state)) + return; xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); sk->sk_write_pending--; -out: - rcu_read_unlock(); } /** @@ -1587,7 +1619,7 @@ static int xs_get_random_port(void) if (max < min) return -EADDRINUSE; range = max - min + 1; - rand = (unsigned short) prandom_u32() % range; + rand = prandom_u32_max(range); return rand + min; } @@ -1638,12 +1670,35 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } -unsigned short get_srcport(struct rpc_xprt *xprt) +static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + unsigned short ret = 0; + mutex_lock(&sock->recv_mutex); + if (sock->sock) + ret = xs_sock_getport(sock->sock); + mutex_unlock(&sock->recv_mutex); + return ret; +} + +static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - return xs_sock_getport(sock->sock); + union { + struct sockaddr sa; + struct sockaddr_storage st; + } saddr; + int ret = -ENOTCONN; + + mutex_lock(&sock->recv_mutex); + if (sock->sock) { + ret = kernel_getsockname(sock->sock, &saddr.sa); + if (ret >= 0) + ret = snprintf(buf, buflen, "%pISc", &saddr.sa); + } + mutex_unlock(&sock->recv_mutex); + return ret; } -EXPORT_SYMBOL(get_srcport); static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { @@ -1825,7 +1880,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); + sk->sk_state_change = xs_local_state_change; sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); @@ -1910,7 +1965,10 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; - if (RPC_IS_ASYNC(task)) { + if (transport->file) + goto force_disconnect; + + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the * filesystem namespace of the process making the rpc @@ -1920,20 +1978,25 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) * we'll need to figure out how to pass a namespace to * connect. */ - task->tk_rpc_status = -ENOTCONN; - rpc_exit(task, -ENOTCONN); - return; + rpc_task_set_rpc_status(task, -ENOTCONN); + goto out_wake; } ret = xs_local_setup_socket(transport); if (ret && !RPC_IS_SOFTCONN(task)) msleep_interruptible(15000); + return; +force_disconnect: + xprt_force_disconnect(xprt); +out_wake: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) /* - * Note that this should be called with XPRT_LOCKED held (or when we otherwise - * know that we have exclusive access to the socket), to guard against - * races with xs_reset_transport. + * Note that this should be called with XPRT_LOCKED held, or recv_mutex + * held, or when we otherwise know that we have exclusive access to the + * socket, to guard against races with xs_reset_transport. */ static void xs_set_memalloc(struct rpc_xprt *xprt) { @@ -1962,13 +2025,11 @@ xs_enable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_inc_return(&xprt->swapper) != 1) - return 0; - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) - return -ERESTARTSYS; - if (xs->inet) + mutex_lock(&xs->recv_mutex); + if (atomic_inc_return(&xprt->swapper) == 1 && + xs->inet) sk_set_memalloc(xs->inet); - xprt_release_xprt(xprt, NULL); + mutex_unlock(&xs->recv_mutex); return 0; } @@ -1984,13 +2045,11 @@ xs_disable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (!atomic_dec_and_test(&xprt->swapper)) - return; - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) - return; - if (xs->inet) + mutex_lock(&xs->recv_mutex); + if (atomic_dec_and_test(&xprt->swapper) && + xs->inet) sk_clear_memalloc(xs->inet); - xprt_release_xprt(xprt, NULL); + mutex_unlock(&xs->recv_mutex); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) @@ -2023,7 +2082,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); xprt_set_connected(xprt); @@ -2047,7 +2105,10 @@ static void xs_udp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; + unsigned int pflags = current->flags; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP, false); @@ -2067,6 +2128,7 @@ out: xprt_clear_connecting(xprt); xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2186,7 +2248,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; /* socket options */ @@ -2226,11 +2287,19 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; int status; + unsigned int pflags = current->flags; + + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); goto out; @@ -2250,10 +2319,9 @@ static void xs_tcp_setup_socket(struct work_struct *work) sock->sk->sk_state); switch (status) { case 0: - xs_set_srcport(transport, sock); - fallthrough; case -EINPROGRESS: /* SYN_SENT! */ + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; fallthrough; @@ -2291,6 +2359,7 @@ out: xprt_clear_connecting(xprt); out_unlock: xprt_unlock_connect(xprt, transport); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2314,13 +2383,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); @@ -2482,7 +2547,7 @@ static int bc_malloc(struct rpc_task *task) return -EINVAL; } - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); if (!page) return -ENOMEM; @@ -2520,6 +2585,9 @@ static int bc_sendto(struct rpc_rqst *req) int err; req->rq_xtime = ktime_get(); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); + if (err < 0) + return err; err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) @@ -2616,6 +2684,8 @@ static const struct rpc_xprt_ops xs_udp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, @@ -2638,6 +2708,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, @@ -2796,9 +2868,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); |